From 4ed7f029bc21977072cdd7923b9665814266ae57 Mon Sep 17 00:00:00 2001
From: Kostiantyn Goloveshko <kostyagoloveshko@gmail.com>
Date: Wed, 29 Jan 2025 19:30:13 +0200
Subject: [PATCH] Add messages implementation for python (#165)

* [python] Add messages implementation for python

* [python] Review fixes

* Fixup property type definitions
* Fixup property descriptions
  * Descriptions inlined where possible
  * Property descriptions are placed after properties per se
* Remove redundant double-quotes at type definitions
* Split enums and model templates
* Simplify gh-action test matrix
* Fixup empty project.toml settings

* Update python/pyproject.toml

Co-authored-by: Luke Hill <20105237+luke-hill@users.noreply.github.com>

* Update CHANGELOG.md

---------

Co-authored-by: Luke Hill <20105237+luke-hill@users.noreply.github.com>
Co-authored-by: David Goss <david@davidgoss.co>
---
 .github/workflows/release-pypi.yaml           |  23 +
 .github/workflows/test-python.yml             |  45 ++
 .pre-commit-config.yaml                       |  38 +
 CHANGELOG.md                                  |   2 +
 Makefile                                      |   2 +-
 codegen/codegen.rb                            |   1 +
 codegen/generators/python.rb                  | 153 ++++
 codegen/templates/python.enum.py.erb          |  17 +
 codegen/templates/python.py.erb               |  34 +
 cpp/cmake/cmate                               |   0
 python/.gitignore                             |  55 ++
 python/Makefile                               |  21 +
 python/README.md                              |   3 +
 python/pyproject.toml                         | 132 ++++
 python/pytest.ini                             |   2 +
 python/src/cucumber_messages/__init__.py      |   4 +
 .../src/cucumber_messages/_message_enums.py   |  54 ++
 python/src/cucumber_messages/_messages.py     | 740 ++++++++++++++++++
 .../src/cucumber_messages/json_converter.py   | 258 ++++++
 python/tests/conftest.py                      |   0
 python/tests/test_json_converter.py           | 208 +++++
 python/tests/test_messages.py                 | 181 +++++
 python/tests/test_model_load.py               |  61 ++
 23 files changed, 2033 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/release-pypi.yaml
 create mode 100644 .github/workflows/test-python.yml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 codegen/generators/python.rb
 create mode 100644 codegen/templates/python.enum.py.erb
 create mode 100644 codegen/templates/python.py.erb
 mode change 100755 => 100644 cpp/cmake/cmate
 create mode 100644 python/.gitignore
 create mode 100644 python/Makefile
 create mode 100644 python/README.md
 create mode 100644 python/pyproject.toml
 create mode 100644 python/pytest.ini
 create mode 100644 python/src/cucumber_messages/__init__.py
 create mode 100644 python/src/cucumber_messages/_message_enums.py
 create mode 100644 python/src/cucumber_messages/_messages.py
 create mode 100644 python/src/cucumber_messages/json_converter.py
 create mode 100644 python/tests/conftest.py
 create mode 100644 python/tests/test_json_converter.py
 create mode 100644 python/tests/test_messages.py
 create mode 100644 python/tests/test_model_load.py

diff --git a/.github/workflows/release-pypi.yaml b/.github/workflows/release-pypi.yaml
new file mode 100644
index 00000000..ba7639b5
--- /dev/null
+++ b/.github/workflows/release-pypi.yaml
@@ -0,0 +1,23 @@
+name: Release Python
+
+on:
+  push:
+    branches: [release/*]
+
+jobs:
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    environment: Release
+    permissions:
+      id-token: write
+    defaults:
+      run:
+        working-directory: python
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - uses: cucumber/action-publish-pypi@v3.0.0
+        with:
+          working-directory: "python"
diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml
new file mode 100644
index 00000000..d6bfa5bd
--- /dev/null
+++ b/.github/workflows/test-python.yml
@@ -0,0 +1,45 @@
+---
+name: test-python
+
+on:
+  push:
+    branches:
+      - main
+      - renovate/**
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        include:
+          # Test latest python on Windows / macOS
+          - { os: 'windows-latest', python-version: '3.13' }
+          - { os: 'macos-latest', python-version: '3.13' }
+        os: ['ubuntu-latest']
+        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.9', 'pypy3.10']
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -U setuptools
+          pip install tox tox-gh-actions codecov
+      - name: Test with tox
+        working-directory: ./python
+        run: |
+          tox
+      - name: Gather codecov report
+        working-directory: ./python
+        run: |
+          codecov
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..19190587
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,38 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+---
+files: ^python/
+exclude: .*python/src/cucumber_messages/_messages\.py
+repos:
+  - repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+      - id: black
+        args:
+          - "python/src"
+          - "python/tests"
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-added-large-files
+  - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
+    rev: v2.14.0
+    hooks:
+      - id: pretty-format-toml
+        args: [--autofix]
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.19.1
+    hooks:
+      - id: pyupgrade
+        args: ["--py39-plus"]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.13.0
+    hooks:
+      - id: mypy
+        additional_dependencies: [types-setuptools, types-certifi]
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2c9474d..b1c70940 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## [Unreleased]
+### Added
+- [python] Added Python implementation ([#165](https://github.com/cucumber/messages/pull/165))
 
 ## [27.1.0] - 2025-01-28
 ### Added
diff --git a/Makefile b/Makefile
index 297f25ec..cf95cf24 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ schemas = \
 	./jsonschema/UndefinedParameterType.json \
 	./jsonschema/Envelope.json
 
-languages = cpp go java javascript perl php ruby dotnet
+languages = cpp dotnet go java javascript perl php python ruby
 
 .DEFAULT_GOAL = help
 
diff --git a/codegen/codegen.rb b/codegen/codegen.rb
index c484dd63..c3afd83d 100644
--- a/codegen/codegen.rb
+++ b/codegen/codegen.rb
@@ -11,6 +11,7 @@
 require_relative 'generators/markdown'
 require_relative 'generators/perl'
 require_relative 'generators/php'
+require_relative 'generators/python'
 require_relative 'generators/ruby'
 require_relative 'generators/typescript'
 
diff --git a/codegen/generators/python.rb b/codegen/generators/python.rb
new file mode 100644
index 00000000..9a84deea
--- /dev/null
+++ b/codegen/generators/python.rb
@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+
+module Generator
+  class Python < Base
+    def format_enum_value(value)
+      value.downcase.gsub(/[.\/+\s-]/, '_')
+    end
+
+    def get_sorted_properties(definition)
+      required_fields = definition['required'] || []
+      definition['properties'].sort_by do |name, *|
+        [required_fields.include?(name) ? 0 : 1, name]
+      end
+    end
+
+    def format_property(parent_type_name, property_name, property, required_fields)
+      snake_name = property_name.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
+                               .gsub(/([a-z\d])([A-Z])/, '\1_\2')
+                               .downcase
+
+      property_type = get_property_type(parent_type_name, property_name, property)
+      is_required = required_fields.include?(property_name)
+
+      property_description = if property['description'] && !property['description'].include?("\n")
+        "  # #{property['description']}"
+      else
+        ''
+      end
+      if is_required
+        "#{snake_name}: #{property_type}#{property_description}"
+      else
+        "#{snake_name}: Optional[#{property_type}] = None#{property_description}"
+      end
+    end
+
+    def get_property_type(parent_type_name, property_name, property)
+      type = type_for(parent_type_name, property_name, property)
+      type.match?(/\A[A-Z]/) ? class_name(type) : type
+    end
+
+    def array_type_for(type_name)
+      inner_type = if language_translations_for_data_types.values.include?(type_name)
+        type_name  # Keep primitive types as is
+      else
+        class_name(type_name)  # CamelCase for complex types
+      end
+      inner_type
+    end
+
+    def format_description(raw_description, indent_string: '    ')
+      return '""" """' if raw_description.nil?
+
+      lines = raw_description.split("\n").map { |line|
+        if line.strip.empty?
+          ""
+        else
+          "#{indent_string}#{line.rstrip}"
+        end
+      }
+
+      %("""\n#{lines.join("\n")}\n#{indent_string}""")
+    end
+
+    def language_translations_for_data_types
+      {
+        'integer' => 'int',
+        'string' => 'str',
+        'boolean' => 'bool',
+        'array' => 'list'
+      }
+    end
+
+    private
+
+    def default_value(parent_type_name, property_name, property)
+      if property['type'] == 'string'
+        default_value_for_string(parent_type_name, property_name, property)
+      elsif property['type'] == 'integer'
+        '0'
+      elsif property['type'] == 'boolean'
+        'False'
+      elsif property['type'] == 'array'
+        '[]'
+      elsif property['$ref']
+        "#{class_name(type_for(parent_type_name, nil, property))}()"
+      else
+        'None'
+      end
+    end
+
+    def default_value_for_string(parent_type_name, property_name, property)
+      if property['enum']
+        enum_type_name = type_for(parent_type_name, property_name, property)
+        "#{class_name(enum_type_name)}.#{enum_constant(property['enum'][0])}"
+      else
+        '""'
+      end
+    end
+
+    def type_for(parent_type_name, property_name, property)
+      if property['$ref']
+        property_type_from_ref(property['$ref'])
+      elsif property['type']
+        property_type_from_type(parent_type_name, property_name, property, type: property['type'])
+      else
+        raise "Property #{property_name} did not define 'type' or '$ref'"
+      end
+    end
+
+    def property_type_from_type(parent_type_name, property_name, property, type:)
+      if type == 'array'
+        type = type_for(parent_type_name, nil, property['items'])
+        inner_type = array_type_for(type)
+        "list[#{inner_type}]"
+      elsif property['enum']
+        enum_name(parent_type_name, property_name, property['enum'])
+      else
+        language_translations_for_data_types.fetch(type)
+      end
+    end
+
+    def enum_constant(value)
+      value.gsub(/[.\/+]/, '_').downcase
+    end
+
+    def enum_name(parent_type_name, property_name, enum)
+      "#{class_name(parent_type_name)}#{capitalize(property_name)}".tap do |name|
+        @enum_set.add({ name: name, values: enum })
+      end
+    end
+
+    def property_type_from_ref(ref)
+      class_name(ref)
+    end
+
+    def class_name(ref)
+      return ref if language_translations_for_data_types.values.include?(ref)
+
+      # Remove .json extension if present
+      name = ref.sub(/\.json$/, '')
+      # Get the basename without path
+      name = File.basename(name)
+      # Convert each word to proper case, handling camelCase and snake_case
+      parts = name.gsub(/[._-]/, '_').split('_').map do |part|
+        # Split by any existing camelCase
+        subparts = part.scan(/[A-Z][a-z]*|[a-z]+/)
+        subparts.map(&:capitalize).join
+      end
+      # Join all parts to create final CamelCase name
+      parts.join
+    end
+  end
+end
\ No newline at end of file
diff --git a/codegen/templates/python.enum.py.erb b/codegen/templates/python.enum.py.erb
new file mode 100644
index 00000000..95432b6f
--- /dev/null
+++ b/codegen/templates/python.enum.py.erb
@@ -0,0 +1,17 @@
+# This code was generated using the code generator from cucumber-messages.
+# Manual changes will be lost if the code is regenerated.
+# Generator: cucumber-messages-python
+
+from enum import Enum
+
+
+<%- @enums.each_with_index do |enum, index| -%>
+class <%= enum[:name] %>(Enum):
+    <%- enum[:values].each do |value| -%>
+    <%= format_enum_value(value) %> = "<%= value %>"
+    <%- end -%>
+<%- if index < @enums.length - 1 -%>
+
+
+<%- end -%>
+<%- end -%>
diff --git a/codegen/templates/python.py.erb b/codegen/templates/python.py.erb
new file mode 100644
index 00000000..49da4502
--- /dev/null
+++ b/codegen/templates/python.py.erb
@@ -0,0 +1,34 @@
+# This code was generated using the code generator from cucumber-messages.
+# Manual changes will be lost if the code is regenerated.
+# Generator: cucumber-messages-python
+
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+
+from ._message_enums import *
+
+<%- @schemas.each_with_index do |schema_pair, index| -%>
+<%- key, definition = schema_pair -%>
+@dataclass
+class <%= class_name(key) %>:
+    <%- if definition['description'] -%>
+    <%= format_description(definition['description']) %>
+    <%- end -%>
+    <%- if definition['properties'].any? -%>
+    <%- required_fields = definition['required'] || [] -%>
+    <%- get_sorted_properties(definition).each do |property_name, property| -%>
+    <%= format_property(key, property_name, property, required_fields) %>
+    <%- if property['description'] && property['description'].include?("\n") -%>
+    <%= format_description(property['description']) %>
+
+    <%- end -%>
+    <%- end -%>
+    <%- else -%>
+    pass
+    <%- end -%>
+<%- if index < @schemas.length - 1 -%>
+
+
+<%- end -%>
+<%- end -%>
diff --git a/cpp/cmake/cmate b/cpp/cmake/cmate
old mode 100755
new mode 100644
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 00000000..563dcc08
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,55 @@
+*.rej
+*.py[cod]
+/.env
+*.orig
+**/__pycache__
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+_build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+.pytest_cache
+.ropeproject
+
+# Sublime
+/*.sublime-*
+
+#PyCharm
+/.idea
+
+# virtualenv
+/.Python
+/lib
+/include
+/share
+/local
diff --git a/python/Makefile b/python/Makefile
new file mode 100644
index 00000000..55a10f2a
--- /dev/null
+++ b/python/Makefile
@@ -0,0 +1,21 @@
+schemas = $(shell find ../jsonschema -name "*.json")
+
+.DEFAULT_GOAL = help
+
+help: ## Show this help
+	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make <target>\n\nWhere <target> is one of:\n"} /^[$$()% a-zA-Z_-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
+
+generate: require src/cucumber_messages/_messages.py src/cucumber_messages/_message_enums.py
+
+require: ## Check requirements for the code generation (ruby is required)
+	@ruby --version >/dev/null 2>&1 || (echo "ERROR: ruby is required."; exit 1)
+
+clean: ## Remove automatically generated files and related artifacts
+	rm -f src/cucumber_messages/_messages.py
+	rm -f src/cucumber_messages/_message_enums.py
+
+src/cucumber_messages/_messages.py: $(schemas) ../codegen/codegen.rb ../codegen/templates/python.py.erb
+	ruby ../codegen/codegen.rb Generator::Python python.py.erb > $@
+
+src/cucumber_messages/_message_enums.py: $(schemas) ../codegen/codegen.rb ../codegen/templates/python.enum.py.erb
+	ruby ../codegen/codegen.rb Generator::Python python.enum.py.erb > $@
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 00000000..0a7e6d51
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,3 @@
+# Messages
+
+Cucumber Messages for Python https://github.com/cucumber/messages
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 00000000..7fd36d81
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,132 @@
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools>=61.0", "wheel"]
+
+[project]
+authors = [
+  {name = "Cucumber Limited", email = "cukes@googlegroups.com"}
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Operating System :: POSIX",
+  "Operating System :: Microsoft :: Windows",
+  "Operating System :: MacOS :: MacOS X",
+  "Topic :: Software Development :: Testing",
+  "Topic :: Utilities",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13"
+]
+dependencies = []
+description = "Cucumber Messages is a message protocol for representing results and other information from Cucumber. "
+license = {text = "MIT"}
+maintainers = [
+  {name = "Konstantin Goloveshko", email = "kostya.goloveshko@gmail.com"}
+]
+name = "cucumber-messages"
+readme = {file = "README.md", content-type = "text/markdown"}
+requires-python = ">=3.9"
+urls = {Repository = "https://github.com/cucumber/messages"}
+version = "0.1.0"
+
+[project.optional-dependencies]
+test = [
+  # local
+  "cucumber-messages[test-coverage]",
+  # external; Must be in sync with [tool.tox]
+  "mypy",
+  "pre-commit",
+  "tox>=4.2"
+]
+test-coverage = [
+  "coverage",
+  "GitPython",
+  "packaging",
+  "pytest"
+]
+
+[tool.black]
+# Don't include autogenerated file
+force-exclude = ".*\\/src\\/cucumber_messages\\/_messages\\.py"
+line-length = 120
+target-version = ["py39", "py310", "py311", "py312", "py313"]
+verbose = true
+
+[tool.isort]
+line_length = 120
+multi_line_output = 3
+profile = "black"
+
+[tool.mypy]
+files = "src/**/*.py"
+install_types = true
+non_interactive = true
+plugins = []
+show_error_codes = true
+warn_return_any = true
+warn_unused_configs = true
+
+[[tool.mypy.overrides]]
+ignore_missing_imports = true
+module = [
+]
+
+# Once https://github.com/tox-dev/tox/issues/999 is released and available, migrate to the new tox approach
+[tool.tox]
+# language=INI
+legacy_tox_ini = """
+[tox]
+requires =
+    tox>=4.2
+env_list =
+    py313-pre-commit-lin
+    py{313, 312, 311, 310, 39}-mypy-lin
+    py{py310, py39, 313, 312, 311, 310, 39}-pytest-coverage-lin
+    py313-pytest-coverage-{win, mac}
+distshare = {homedir}/.tox/distshare
+
+[testenv]
+platform =
+    lin: linux
+    mac: darwin
+    win: win32
+
+[testenv:py313-pre-commit-lin]
+skip_install = true
+deps =
+    pre-commit
+commands =
+    pre-commit run
+
+[testenv:py{313, 312, 311, 310, 39}-mypy-lin]
+deps =
+    mypy
+allowlist_externals =
+    mkdir
+    chmod
+commands =
+    mkdir .mypy_cache
+    chmod 755 .mypy_cache
+    python -m mypy --cache-dir .mypy_cache
+
+[testenv:py{py310, py39, 313, 312, 311, 310, 39}-pytest-coverage-{lin, win, mac}]
+deps =
+    .[test-coverage]
+commands =
+    coverage run --append -m pytest -vvl
+
+[gh-actions]
+python =
+    3.9: py39
+    3.10: py310
+    3.11: py311
+    3.12: py312
+    3.13: py313
+    pypy-3.9: pypy39
+    pypy-3.10: pypy310
+"""
diff --git a/python/pytest.ini b/python/pytest.ini
new file mode 100644
index 00000000..5ee64771
--- /dev/null
+++ b/python/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+testpaths = tests
diff --git a/python/src/cucumber_messages/__init__.py b/python/src/cucumber_messages/__init__.py
new file mode 100644
index 00000000..8d35298f
--- /dev/null
+++ b/python/src/cucumber_messages/__init__.py
@@ -0,0 +1,4 @@
+from . import _messages, json_converter
+from ._messages import *
+
+message_converter: json_converter.JsonDataclassConverter = json_converter.JsonDataclassConverter(module_scope=_messages)
diff --git a/python/src/cucumber_messages/_message_enums.py b/python/src/cucumber_messages/_message_enums.py
new file mode 100644
index 00000000..80d33586
--- /dev/null
+++ b/python/src/cucumber_messages/_message_enums.py
@@ -0,0 +1,54 @@
+# This code was generated using the code generator from cucumber-messages.
+# Manual changes will be lost if the code is regenerated.
+# Generator: cucumber-messages-python
+
+from enum import Enum
+
+
+class AttachmentContentEncoding(Enum):
+    identity = "IDENTITY"
+    base64 = "BASE64"
+
+
+class HookType(Enum):
+    before_test_run = "BEFORE_TEST_RUN"
+    after_test_run = "AFTER_TEST_RUN"
+    before_test_case = "BEFORE_TEST_CASE"
+    after_test_case = "AFTER_TEST_CASE"
+    before_test_step = "BEFORE_TEST_STEP"
+    after_test_step = "AFTER_TEST_STEP"
+
+
+class PickleStepType(Enum):
+    unknown = "Unknown"
+    context = "Context"
+    action = "Action"
+    outcome = "Outcome"
+
+
+class SourceMediaType(Enum):
+    text_x_cucumber_gherkin_plain = "text/x.cucumber.gherkin+plain"
+    text_x_cucumber_gherkin_markdown = "text/x.cucumber.gherkin+markdown"
+
+
+class StepDefinitionPatternType(Enum):
+    cucumber_expression = "CUCUMBER_EXPRESSION"
+    regular_expression = "REGULAR_EXPRESSION"
+
+
+class StepKeywordType(Enum):
+    unknown = "Unknown"
+    context = "Context"
+    action = "Action"
+    outcome = "Outcome"
+    conjunction = "Conjunction"
+
+
+class TestStepResultStatus(Enum):
+    unknown = "UNKNOWN"
+    passed = "PASSED"
+    skipped = "SKIPPED"
+    pending = "PENDING"
+    undefined = "UNDEFINED"
+    ambiguous = "AMBIGUOUS"
+    failed = "FAILED"
diff --git a/python/src/cucumber_messages/_messages.py b/python/src/cucumber_messages/_messages.py
new file mode 100644
index 00000000..1de36576
--- /dev/null
+++ b/python/src/cucumber_messages/_messages.py
@@ -0,0 +1,740 @@
+# This code was generated using the code generator from cucumber-messages.
+# Manual changes will be lost if the code is regenerated.
+# Generator: cucumber-messages-python
+
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+
+from ._message_enums import *
+
+@dataclass
+class Attachment:
+    """
+    //// Attachments (parse errors, execution errors, screenshots, links...)
+
+    *
+     An attachment represents any kind of data associated with a line in a
+     [Source](#io.cucumber.messages.Source) file. It can be used for:
+
+     * Syntax errors during parse time
+     * Screenshots captured and attached during execution
+     * Logs captured and attached during execution
+
+     It is not to be used for runtime errors raised/thrown during execution. This
+     is captured in `TestResult`.
+    """
+    body: str
+    """
+    *
+     The body of the attachment. If `contentEncoding` is `IDENTITY`, the attachment
+     is simply the string. If it's `BASE64`, the string should be Base64 decoded to
+     obtain the attachment.
+    """
+
+    content_encoding: AttachmentContentEncoding
+    """
+    *
+     Whether to interpret `body` "as-is" (IDENTITY) or if it needs to be Base64-decoded (BASE64).
+
+     Content encoding is *not* determined by the media type, but rather by the type
+     of the object being attached:
+
+     - string: IDENTITY
+     - byte array: BASE64
+     - stream: BASE64
+    """
+
+    media_type: str
+    """
+    *
+     The media type of the data. This can be any valid
+     [IANA Media Type](https://www.iana.org/assignments/media-types/media-types.xhtml)
+     as well as Cucumber-specific media types such as `text/x.cucumber.gherkin+plain`
+     and `text/x.cucumber.stacktrace+plain`
+    """
+
+    file_name: Optional[str] = None
+    """
+    *
+     Suggested file name of the attachment. (Provided by the user as an argument to `attach`)
+    """
+
+    source: Optional[Source] = None
+    test_case_started_id: Optional[str] = None
+    test_run_started_id: Optional[str] = None
+    test_step_id: Optional[str] = None
+    url: Optional[str] = None
+    """
+    *
+     A URL where the attachment can be retrieved. This field should not be set by Cucumber.
+     It should be set by a program that reads a message stream and does the following for
+     each Attachment message:
+
+     - Writes the body (after base64 decoding if necessary) to a new file.
+     - Sets `body` and `contentEncoding` to `null`
+     - Writes out the new attachment message
+
+     This will result in a smaller message stream, which can improve performance and
+     reduce bandwidth of message consumers. It also makes it easier to process and download attachments
+     separately from reports.
+    """
+
+
+
+@dataclass
+class Duration:
+    """
+    The structure is pretty close of the Timestamp one. For clarity, a second type
+     of message is used.
+    """
+    nanos: int
+    """
+    Non-negative fractions of a second at nanosecond resolution. Negative
+     second values with fractions must still have non-negative nanos values
+     that count forward in time. Must be from 0 to 999,999,999
+     inclusive.
+    """
+
+    seconds: int
+
+
+@dataclass
+class Envelope:
+    """
+    When removing a field, replace it with reserved, rather than deleting the line.
+     When adding a field, add it to the end and increment the number by one.
+     See https://developers.google.com/protocol-buffers/docs/proto#updating for details
+
+    *
+     All the messages that are passed between different components/processes are Envelope
+     messages.
+    """
+    attachment: Optional[Attachment] = None
+    gherkin_document: Optional[GherkinDocument] = None
+    hook: Optional[Hook] = None
+    meta: Optional[Meta] = None
+    parameter_type: Optional[ParameterType] = None
+    parse_error: Optional[ParseError] = None
+    pickle: Optional[Pickle] = None
+    source: Optional[Source] = None
+    step_definition: Optional[StepDefinition] = None
+    test_case: Optional[TestCase] = None
+    test_case_finished: Optional[TestCaseFinished] = None
+    test_case_started: Optional[TestCaseStarted] = None
+    test_run_finished: Optional[TestRunFinished] = None
+    test_run_hook_finished: Optional[TestRunHookFinished] = None
+    test_run_hook_started: Optional[TestRunHookStarted] = None
+    test_run_started: Optional[TestRunStarted] = None
+    test_step_finished: Optional[TestStepFinished] = None
+    test_step_started: Optional[TestStepStarted] = None
+    undefined_parameter_type: Optional[UndefinedParameterType] = None
+
+
+@dataclass
+class Exception:
+    """
+    A simplified representation of an exception
+    """
+    type: str  # The type of the exception that caused this result. E.g. "Error" or "org.opentest4j.AssertionFailedError"
+    message: Optional[str] = None  # The message of exception that caused this result. E.g. expected: "a" but was: "b"
+    stack_trace: Optional[str] = None  # The stringified stack trace of the exception that caused this result
+
+
+@dataclass
+class GherkinDocument:
+    """
+    *
+     The [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of a Gherkin document.
+     Cucumber implementations should *not* depend on `GherkinDocument` or any of its
+     children for execution - use [Pickle](#io.cucumber.messages.Pickle) instead.
+
+     The only consumers of `GherkinDocument` should only be formatters that produce
+     "rich" output, resembling the original Gherkin document.
+    """
+    comments: list[Comment]  # All the comments in the Gherkin document
+    feature: Optional[Feature] = None
+    uri: Optional[str] = None
+    """
+    *
+     The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier)
+     of the source, typically a file path relative to the root directory
+    """
+
+
+
+@dataclass
+class Background:
+    description: str
+    id: str
+    keyword: str
+    location: Location  # The location of the `Background` keyword
+    name: str
+    steps: list[Step]
+
+
+@dataclass
+class Comment:
+    """
+    *
+     A comment in a Gherkin document
+    """
+    location: Location  # The location of the comment
+    text: str  # The text of the comment
+
+
+@dataclass
+class DataTable:
+    location: Location
+    rows: list[TableRow]
+
+
+@dataclass
+class DocString:
+    content: str
+    delimiter: str
+    location: Location
+    media_type: Optional[str] = None
+
+
+@dataclass
+class Examples:
+    description: str
+    id: str
+    keyword: str
+    location: Location  # The location of the `Examples` keyword
+    name: str
+    table_body: list[TableRow]
+    tags: list[Tag]
+    table_header: Optional[TableRow] = None
+
+
+@dataclass
+class Feature:
+    children: list[FeatureChild]  # Zero or more children
+    description: str  # The line(s) underneath the line with the `keyword` that are used as description
+    keyword: str  # The text of the `Feature` keyword (in the language specified by `language`)
+    language: str  # The [ISO 639-1](https://en.wikipedia.org/wiki/ISO_639-1) language code of the Gherkin document
+    location: Location  # The location of the `Feature` keyword
+    name: str  # The name of the feature (the text following the `keyword`)
+    tags: list[Tag]  # All the tags placed above the `Feature` keyword
+
+
+@dataclass
+class FeatureChild:
+    """
+    *
+     A child node of a `Feature` node
+    """
+    background: Optional[Background] = None
+    rule: Optional[Rule] = None
+    scenario: Optional[Scenario] = None
+
+
+@dataclass
+class Rule:
+    children: list[RuleChild]
+    description: str
+    id: str
+    keyword: str
+    location: Location  # The location of the `Rule` keyword
+    name: str
+    tags: list[Tag]  # All the tags placed above the `Rule` keyword
+
+
+@dataclass
+class RuleChild:
+    """
+    *
+     A child node of a `Rule` node
+    """
+    background: Optional[Background] = None
+    scenario: Optional[Scenario] = None
+
+
+@dataclass
+class Scenario:
+    description: str
+    examples: list[Examples]
+    id: str
+    keyword: str
+    location: Location  # The location of the `Scenario` keyword
+    name: str
+    steps: list[Step]
+    tags: list[Tag]
+
+
+@dataclass
+class Step:
+    """
+    A step
+    """
+    id: str  # Unique ID to be able to reference the Step from PickleStep
+    keyword: str  # The actual keyword as it appeared in the source.
+    location: Location  # The location of the steps' `keyword`
+    text: str
+    data_table: Optional[DataTable] = None
+    doc_string: Optional[DocString] = None
+    keyword_type: Optional[StepKeywordType] = None  # The test phase signalled by the keyword: Context definition (Given), Action performance (When), Outcome assertion (Then). Other keywords signal Continuation (And and But) from a prior keyword. Please note that all translations which a dialect maps to multiple keywords (`*` is in this category for all dialects), map to 'Unknown'.
+
+
+@dataclass
+class TableCell:
+    """
+    A cell in a `TableRow`
+    """
+    location: Location  # The location of the cell
+    value: str  # The value of the cell
+
+
+@dataclass
+class TableRow:
+    """
+    A row in a table
+    """
+    cells: list[TableCell]  # Cells in the row
+    id: str
+    location: Location  # The location of the first cell in the row
+
+
+@dataclass
+class Tag:
+    """
+    *
+     A tag
+    """
+    id: str  # Unique ID to be able to reference the Tag from PickleTag
+    location: Location  # Location of the tag
+    name: str  # The name of the tag (including the leading `@`)
+
+
+@dataclass
+class Hook:
+    id: str
+    source_reference: SourceReference
+    name: Optional[str] = None
+    tag_expression: Optional[str] = None
+    type: Optional[HookType] = None
+
+
+@dataclass
+class Location:
+    """
+    *
+     Points to a line and a column in a text file
+    """
+    line: int
+    column: Optional[int] = None
+
+
+@dataclass
+class Meta:
+    """
+    *
+     This message contains meta information about the environment. Consumers can use
+     this for various purposes.
+    """
+    cpu: Product  # 386, arm, amd64 etc
+    implementation: Product  # SpecFlow, Cucumber-JVM, Cucumber.js, Cucumber-Ruby, Behat etc.
+    os: Product  # Windows, Linux, MacOS etc
+    protocol_version: str
+    """
+    *
+     The [SEMVER](https://semver.org/) version number of the protocol
+    """
+
+    runtime: Product  # Java, Ruby, Node.js etc
+    ci: Optional[Ci] = None
+
+
+@dataclass
+class Ci:
+    """
+    CI environment
+    """
+    name: str  # Name of the CI product, e.g. "Jenkins", "CircleCI" etc.
+    build_number: Optional[str] = None  # The build number. Some CI servers use non-numeric build numbers, which is why this is a string
+    git: Optional[Git] = None
+    url: Optional[str] = None  # Link to the build
+
+
+@dataclass
+class Git:
+    """
+    Information about Git, provided by the Build/CI server as environment
+     variables.
+    """
+    remote: str
+    revision: str
+    branch: Optional[str] = None
+    tag: Optional[str] = None
+
+
+@dataclass
+class Product:
+    """
+    Used to describe various properties of Meta
+    """
+    name: str  # The product name
+    version: Optional[str] = None  # The product version
+
+
+@dataclass
+class ParameterType:
+    id: str
+    name: str  # The name is unique, so we don't need an id.
+    prefer_for_regular_expression_match: bool
+    regular_expressions: list[str]
+    use_for_snippets: bool
+    source_reference: Optional[SourceReference] = None
+
+
+@dataclass
+class ParseError:
+    message: str
+    source: SourceReference
+
+
+@dataclass
+class Pickle:
+    """
+    //// Pickles
+
+    *
+     A `Pickle` represents a template for a `TestCase`. It is typically derived
+     from another format, such as [GherkinDocument](#io.cucumber.messages.GherkinDocument).
+     In the future a `Pickle` may be derived from other formats such as Markdown or
+     Excel files.
+
+     By making `Pickle` the main data structure Cucumber uses for execution, the
+     implementation of Cucumber itself becomes simpler, as it doesn't have to deal
+     with the complex structure of a [GherkinDocument](#io.cucumber.messages.GherkinDocument).
+
+     Each `PickleStep` of a `Pickle` is matched with a `StepDefinition` to create a `TestCase`
+    """
+    ast_node_ids: list[str]
+    """
+    *
+     Points to the AST node locations of the pickle. The last one represents the unique
+     id of the pickle. A pickle constructed from `Examples` will have the first
+     id originating from the `Scenario` AST node, and the second from the `TableRow` AST node.
+    """
+
+    id: str
+    """
+    *
+     A unique id for the pickle
+    """
+
+    language: str  # The language of the pickle
+    name: str  # The name of the pickle
+    steps: list[PickleStep]  # One or more steps
+    tags: list[PickleTag]
+    """
+    *
+     One or more tags. If this pickle is constructed from a Gherkin document,
+     It includes inherited tags from the `Feature` as well.
+    """
+
+    uri: str  # The uri of the source file
+
+
+@dataclass
+class PickleDocString:
+    content: str
+    media_type: Optional[str] = None
+
+
+@dataclass
+class PickleStep:
+    """
+    *
+     An executable step
+    """
+    ast_node_ids: list[str]
+    """
+    References the IDs of the source of the step. For Gherkin, this can be
+     the ID of a Step, and possibly also the ID of a TableRow
+    """
+
+    id: str  # A unique ID for the PickleStep
+    text: str
+    argument: Optional[PickleStepArgument] = None
+    type: Optional[PickleStepType] = None
+    """
+    The context in which the step was specified: context (Given), action (When) or outcome (Then).
+
+    Note that the keywords `But` and `And` inherit their meaning from prior steps and the `*` 'keyword' doesn't have specific meaning (hence Unknown)
+    """
+
+
+
+@dataclass
+class PickleStepArgument:
+    """
+    An optional argument
+    """
+    data_table: Optional[PickleTable] = None
+    doc_string: Optional[PickleDocString] = None
+
+
+@dataclass
+class PickleTable:
+    rows: list[PickleTableRow]
+
+
+@dataclass
+class PickleTableCell:
+    value: str
+
+
+@dataclass
+class PickleTableRow:
+    cells: list[PickleTableCell]
+
+
+@dataclass
+class PickleTag:
+    """
+    *
+     A tag
+    """
+    ast_node_id: str  # Points to the AST node this was created from
+    name: str
+
+
+@dataclass
+class Source:
+    """
+    //// Source
+
+    *
+     A source file, typically a Gherkin document or Java/Ruby/JavaScript source code
+    """
+    data: str  # The contents of the file
+    media_type: SourceMediaType
+    """
+    The media type of the file. Can be used to specify custom types, such as
+     text/x.cucumber.gherkin+plain
+    """
+
+    uri: str
+    """
+    *
+     The [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier)
+     of the source, typically a file path relative to the root directory
+    """
+
+
+
+@dataclass
+class SourceReference:
+    """
+    *
+     Points to a [Source](#io.cucumber.messages.Source) identified by `uri` and a
+     [Location](#io.cucumber.messages.Location) within that file.
+    """
+    java_method: Optional[JavaMethod] = None
+    java_stack_trace_element: Optional[JavaStackTraceElement] = None
+    location: Optional[Location] = None
+    uri: Optional[str] = None
+
+
+@dataclass
+class JavaMethod:
+    class_name: str
+    method_name: str
+    method_parameter_types: list[str]
+
+
+@dataclass
+class JavaStackTraceElement:
+    class_name: str
+    file_name: str
+    method_name: str
+
+
+@dataclass
+class StepDefinition:
+    id: str
+    pattern: StepDefinitionPattern
+    source_reference: SourceReference
+
+
+@dataclass
+class StepDefinitionPattern:
+    source: str
+    type: StepDefinitionPatternType
+
+
+@dataclass
+class TestCase:
+    """
+    //// TestCases
+
+    *
+     A `TestCase` contains a sequence of `TestStep`s.
+    """
+    id: str
+    pickle_id: str  # The ID of the `Pickle` this `TestCase` is derived from.
+    test_steps: list[TestStep]
+    test_run_started_id: Optional[str] = None  # Identifier for the test run that this test case belongs to
+
+
+@dataclass
+class Group:
+    children: list[Group]
+    start: Optional[int] = None
+    value: Optional[str] = None
+
+
+@dataclass
+class StepMatchArgument:
+    """
+    *
+     Represents a single argument extracted from a step match and passed to a step definition.
+     This is used for the following purposes:
+     - Construct an argument to pass to a step definition (possibly through a parameter type transform)
+     - Highlight the matched parameter in rich formatters such as the HTML formatter
+
+     This message closely matches the `Argument` class in the `cucumber-expressions` library.
+    """
+    group: Group
+    """
+    *
+     Represents the outermost capture group of an argument. This message closely matches the
+     `Group` class in the `cucumber-expressions` library.
+    """
+
+    parameter_type_name: Optional[str] = None
+
+
+@dataclass
+class StepMatchArgumentsList:
+    step_match_arguments: list[StepMatchArgument]
+
+
+@dataclass
+class TestStep:
+    """
+    *
+     A `TestStep` is derived from either a `PickleStep`
+     combined with a `StepDefinition`, or from a `Hook`.
+    """
+    id: str
+    hook_id: Optional[str] = None  # Pointer to the `Hook` (if derived from a Hook)
+    pickle_step_id: Optional[str] = None  # Pointer to the `PickleStep` (if derived from a `PickleStep`)
+    step_definition_ids: Optional[list[str]] = None
+    """
+    Pointer to all the matching `StepDefinition`s (if derived from a `PickleStep`)
+     Each element represents a matching step definition. A size of 0 means `UNDEFINED`,
+     and a size of 2+ means `AMBIGUOUS`
+    """
+
+    step_match_arguments_lists: Optional[list[StepMatchArgumentsList]] = None  # A list of list of StepMatchArgument (if derived from a `PickleStep`).
+
+
+@dataclass
+class TestCaseFinished:
+    test_case_started_id: str
+    timestamp: Timestamp
+    will_be_retried: bool
+
+
+@dataclass
+class TestCaseStarted:
+    attempt: int
+    """
+    *
+     The first attempt should have value 0, and for each retry the value
+     should increase by 1.
+    """
+
+    id: str
+    """
+    *
+     Because a `TestCase` can be run multiple times (in case of a retry),
+     we use this field to group messages relating to the same attempt.
+    """
+
+    test_case_id: str
+    timestamp: Timestamp
+    worker_id: Optional[str] = None  # An identifier for the worker process running this test case, if test cases are being run in parallel. The identifier will be unique per worker, but no particular format is defined - it could be an index, uuid, machine name etc - and as such should be assumed that it's not human readable.
+
+
+@dataclass
+class TestRunFinished:
+    success: bool  # A test run is successful if all steps are either passed or skipped, all before/after hooks passed and no other exceptions where thrown.
+    timestamp: Timestamp  # Timestamp when the TestRun is finished
+    exception: Optional[Exception] = None  # Any exception thrown during the test run, if any. Does not include exceptions thrown while executing steps.
+    message: Optional[str] = None  # An informative message about the test run. Typically additional information about failure, but not necessarily.
+    test_run_started_id: Optional[str] = None
+
+
+@dataclass
+class TestRunHookFinished:
+    result: TestStepResult
+    test_run_hook_started_id: str  # Identifier for the hook execution that has finished
+    timestamp: Timestamp
+
+
+@dataclass
+class TestRunHookStarted:
+    hook_id: str  # Identifier for the hook that will be executed
+    id: str  # Unique identifier for this hook execution
+    test_run_started_id: str  # Identifier for the test run that this hook execution belongs to
+    timestamp: Timestamp
+
+
+@dataclass
+class TestRunStarted:
+    timestamp: Timestamp
+    id: Optional[str] = None
+
+
+@dataclass
+class TestStepFinished:
+    test_case_started_id: str
+    test_step_id: str
+    test_step_result: TestStepResult
+    timestamp: Timestamp
+
+
+@dataclass
+class TestStepResult:
+    duration: Duration
+    status: TestStepResultStatus
+    exception: Optional[Exception] = None  # Exception thrown while executing this step, if any.
+    message: Optional[str] = None  # An arbitrary bit of information that explains this result. This can be a stack trace of anything else.
+
+
+@dataclass
+class TestStepStarted:
+    test_case_started_id: str
+    test_step_id: str
+    timestamp: Timestamp
+
+
+@dataclass
+class Timestamp:
+    nanos: int
+    """
+    Non-negative fractions of a second at nanosecond resolution. Negative
+     second values with fractions must still have non-negative nanos values
+     that count forward in time. Must be from 0 to 999,999,999
+     inclusive.
+    """
+
+    seconds: int
+    """
+    Represents seconds of UTC time since Unix epoch
+     1970-01-01T00:00:00Z. Must be from 0001-01-01T00:00:00Z to
+     9999-12-31T23:59:59Z inclusive.
+    """
+
+
+
+@dataclass
+class UndefinedParameterType:
+    expression: str
+    name: str
diff --git a/python/src/cucumber_messages/json_converter.py b/python/src/cucumber_messages/json_converter.py
new file mode 100644
index 00000000..5158e729
--- /dev/null
+++ b/python/src/cucumber_messages/json_converter.py
@@ -0,0 +1,258 @@
+import collections.abc
+import types
+from dataclasses import MISSING, fields, is_dataclass
+from datetime import date, datetime
+from enum import Enum
+from types import GenericAlias
+from typing import Any, Optional, Union, cast, get_args, get_origin
+
+
+def camel_to_snake(s: str) -> str:
+    """Convert a camelCase string to snake_case."""
+    if not s:
+        return s
+    result = [s[0].lower()]
+    for char in s[1:]:
+        if char.isupper():
+            result.extend(["_", char.lower()])
+        else:
+            result.append(char)
+    return "".join(result)
+
+
+def snake_to_camel(s: str) -> str:
+    """Convert a snake_case string to camelCase."""
+    if not s:
+        return s
+    components = s.split("_")
+    return components[0] + "".join(x.title() for x in components[1:])
+
+
+class TypeResolver:
+    """Resolves type hints to their concrete types using the module scope."""
+
+    def __init__(self, module_scope: types.ModuleType):
+        self.module_scope = module_scope
+        self._type_cache: dict[str, Any] = {}
+
+    def _resolve_forward_ref(self, type_hint: Any) -> Any:
+        """Handle forward reference types."""
+        if not hasattr(type_hint, "__forward_arg__"):
+            return None
+        forward_arg = type_hint.__forward_arg__
+        return getattr(self.module_scope, forward_arg, Any)
+
+    def _resolve_union(self, type_hint: Any) -> Any:
+        """Handle Union types including Optional."""
+        args = get_args(type_hint)
+        resolved_args = tuple(self.resolve_type(arg) for arg in args)
+        return Union[resolved_args]
+
+    def _resolve_string_optional(self, type_str: str) -> Any:
+        """Handle Optional types defined as strings."""
+        if not type_str.startswith("Optional["):
+            return None
+        inner_type = type_str[9:-1].strip("'\"")
+        resolved_inner = self.resolve_type(inner_type)
+        return Union[resolved_inner, type(None)]
+
+    def _resolve_generic_collection(self, type_str: str) -> Any:
+        """Handle generic collection types like List[T] and Dict[K, V]."""
+        if "[" not in type_str:
+            return None
+
+        base_type, inner = type_str.split("[", 1)
+        inner = inner.rstrip("]").strip()
+
+        if base_type.lower() in {"list", "sequence"}:
+            resolved_type = self.resolve_type(inner.strip("'\""))
+            return GenericAlias(list, (resolved_type,))
+
+        if base_type.lower() == "dict":
+            key_type_str, value_type_str = map(str.strip, inner.split(",", 1))
+            resolved_key = self.resolve_type(key_type_str.strip("'\""))
+            resolved_value = self.resolve_type(value_type_str.strip("'\""))
+            return GenericAlias(dict, (resolved_key, resolved_value))
+
+        return Any
+
+    def resolve_type(self, type_hint: Any) -> Any:
+        """Resolve a type hint to its concrete type."""
+        if isinstance(type_hint, type):
+            return type_hint
+
+        resolved_forward = self._resolve_forward_ref(type_hint)
+        if resolved_forward is not None:
+            return resolved_forward
+
+        if get_origin(type_hint) is Union:
+            return self._resolve_union(type_hint)
+
+        if not isinstance(type_hint, str):
+            return type_hint
+
+        if type_hint in self._type_cache:
+            return self._type_cache[type_hint]
+
+        clean_hint = type_hint.strip("'\"")
+
+        resolved_optional = self._resolve_string_optional(clean_hint)
+        if resolved_optional is not None:
+            self._type_cache[type_hint] = resolved_optional
+            return resolved_optional
+
+        resolved_collection = self._resolve_generic_collection(clean_hint)
+        if resolved_collection is not None:
+            self._type_cache[type_hint] = resolved_collection
+            return resolved_collection
+
+        if hasattr(self.module_scope, clean_hint):
+            resolved = getattr(self.module_scope, clean_hint)
+            if isinstance(resolved, type):
+                self._type_cache[type_hint] = resolved
+                return resolved
+
+        return Any
+
+
+class JsonDataclassConverter:
+    def __init__(self, module_scope: types.ModuleType):
+        self.type_resolver = TypeResolver(module_scope)
+
+    def _convert_datetime(self, value: Any, target_type: Any) -> Any:
+        """Convert datetime and date values."""
+        if target_type in (datetime, date) and isinstance(value, str):
+            return target_type.fromisoformat(value)
+        return None
+
+    def _convert_enum(self, value: Any, target_type: Any) -> Any:
+        """Convert enum values."""
+        if isinstance(target_type, type) and issubclass(target_type, Enum):
+            return target_type(value)
+        return None
+
+    def _convert_sequence(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any:
+        """Convert sequence values."""
+        origin = get_origin(target_type)
+        if not (origin is not None and isinstance(origin, type) and issubclass(origin, collections.abc.Sequence)):
+            return None
+
+        if isinstance(value, str):
+            return value
+
+        args = get_args(target_type)
+        item_type = args[0] if args else Any
+        return [self._convert_value(item, item_type, field_name) for item in value]
+
+    def _convert_dict(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any:
+        """Convert dictionary values."""
+        if get_origin(target_type) is not dict:
+            return None
+
+        key_type, value_type = get_args(target_type)
+        return {
+            self._convert_value(k, key_type, field_name): self._convert_value(v, value_type, field_name)
+            for k, v in value.items()
+        }
+
+    def _convert_dataclass(self, value: Any, target_type: Any) -> Any:
+        """Convert nested dataclass values."""
+        if is_dataclass(target_type) and isinstance(value, dict):
+            # Cast target_type to Type[Any] to satisfy Mypy
+            return self.from_dict(value, cast(type[Any], target_type))
+        return None
+
+    def _convert_optional(self, value: Any, target_type: Any, field_name: Optional[str]) -> Any:
+        """Convert Optional/Union values."""
+        if get_origin(target_type) is not Union:
+            return None
+
+        args = get_args(target_type)
+        if value is None and type(None) in args:
+            return None
+
+        actual_type = next((t for t in args if t is not type(None)), Any)
+        return self._convert_value(value, actual_type, field_name)
+
+    def _convert_value(self, value: Any, target_type: Any, field_name: Optional[str] = None) -> Any:
+        """Convert a single value to the target type."""
+        if value is None:
+            return None
+
+        converted = (
+            self._convert_optional(value, target_type, field_name)
+            or self._convert_datetime(value, target_type)
+            or self._convert_enum(value, target_type)
+            or self._convert_sequence(value, target_type, field_name)
+            or self._convert_dict(value, target_type, field_name)
+            or self._convert_dataclass(value, target_type)
+            or value
+        )
+        return converted
+
+    def from_dict(self, data: Any, target_class: type[Any]) -> Any:
+        """Convert a dictionary to a dataclass instance."""
+        if data is None:
+            return None
+
+        if not is_dataclass(target_class):
+            return self._convert_value(data, target_class, None)
+
+        if not isinstance(data, dict):
+            raise TypeError(f"Expected dict but got {type(data)}")
+
+        class_fields = {field.name: field for field in fields(target_class)}
+        init_kwargs = {}
+
+        for key, value in data.items():
+            field_name = camel_to_snake(key)
+            if field_name not in class_fields:
+                continue
+
+            field = class_fields[field_name]
+            field_type = self.type_resolver.resolve_type(field.type)
+
+            try:
+                init_kwargs[field_name] = self._convert_value(value, field_type, field_name)
+            except Exception as e:
+                raise TypeError(f"Error converting field {key}: {str(e)}")
+
+        missing_required = [
+            name
+            for name, field in class_fields.items()
+            if name not in init_kwargs and field.default is MISSING and field.default_factory is MISSING
+        ]
+
+        if missing_required:
+            raise TypeError(f"Missing required fields: {', '.join(missing_required)}")
+
+        return target_class(**init_kwargs)
+
+    def to_dict(self, obj: Any) -> Any:
+        """Convert a dataclass instance to a dictionary."""
+        if obj is None:
+            return None
+
+        if isinstance(obj, (str, int, float, bool)):
+            return obj
+
+        if isinstance(obj, Enum):
+            return obj.value
+
+        if isinstance(obj, (datetime, date)):
+            return obj.isoformat()
+
+        if isinstance(obj, (list, tuple)):
+            return [self.to_dict(item) for item in obj]
+
+        if isinstance(obj, dict):
+            return {snake_to_camel(str(key)): self.to_dict(value) for key, value in obj.items()}
+
+        if is_dataclass(obj):
+            return {
+                snake_to_camel(field.name): self.to_dict(getattr(obj, field.name))
+                for field in fields(obj)
+                if getattr(obj, field.name) is not None
+            }
+
+        return str(obj)
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/tests/test_json_converter.py b/python/tests/test_json_converter.py
new file mode 100644
index 00000000..24d6af0a
--- /dev/null
+++ b/python/tests/test_json_converter.py
@@ -0,0 +1,208 @@
+import sys
+from collections.abc import Sequence
+from dataclasses import dataclass
+from datetime import date, datetime
+from enum import Enum
+from typing import Optional
+
+import pytest
+
+from cucumber_messages.json_converter import JsonDataclassConverter, camel_to_snake, snake_to_camel
+
+
+class SimpleEnum(Enum):
+    VALUE1 = "value1"
+    VALUE2 = "value2"
+
+
+@dataclass
+class SimpleModel:
+    string_field: str
+    int_field: int
+    float_field: float
+    bool_field: bool
+    enum_field: SimpleEnum
+    optional_field: Optional[str] = None
+
+
+@dataclass
+class NestedModel:
+    name: str
+    simple: SimpleModel
+    optional_simple: Optional["SimpleModel"] = None
+
+
+@dataclass
+class CollectionsModel:
+    sequence_field: Sequence[str]
+    list_field: list[SimpleModel]
+    dict_field: dict[str, SimpleModel]
+    optional_sequence: Optional[Sequence["str"]] = None
+
+
+@dataclass
+class DateTimeModel:
+    datetime_field: datetime
+    date_field: date
+    optional_datetime: Optional["datetime"] = None
+
+
+@pytest.fixture
+def serializer():
+    return JsonDataclassConverter(module_scope=sys.modules[__name__])
+
+
+def test_optional_field_types(serializer):
+    # Testing deserialization with non-quoted and quoted types
+    data = {
+        "name": "test",
+        "simple": {
+            "stringField": "nested",
+            "intField": 42,
+            "floatField": 3.14,
+            "boolField": True,
+            "enumField": "value1",
+        },
+        "optionalSimple": {
+            "stringField": "optional",
+            "intField": 99,
+            "floatField": 2.71,
+            "boolField": False,
+            "enumField": "value2",
+        },
+    }
+
+    model = serializer.from_dict(data, NestedModel)
+
+    # Verify main fields
+    assert model.name == "test"
+    assert model.simple.string_field == "nested"
+    assert model.simple.int_field == 42
+
+    # Verify optional fields are correctly deserialized
+    assert model.optional_simple is not None
+    assert model.optional_simple.string_field == "optional"
+    assert model.optional_simple.int_field == 99
+
+    # Verify types explicitly
+    assert isinstance(model.optional_simple, SimpleModel)
+    assert isinstance(model.optional_simple.string_field, str)
+    assert isinstance(model.optional_simple.int_field, int)
+    assert isinstance(model.optional_simple.float_field, float)
+    assert isinstance(model.optional_simple.bool_field, bool)
+    assert isinstance(model.optional_simple.enum_field, SimpleEnum)
+
+
+def test_collections_with_optional_field_types(serializer):
+    data = {
+        "sequenceField": ["a", "b", "c"],
+        "listField": [
+            {
+                "stringField": "item",
+                "intField": 42,
+                "floatField": 3.14,
+                "boolField": True,
+                "enumField": "value1",
+            }
+        ],
+        "dictField": {
+            "key": {
+                "stringField": "dict_item",
+                "intField": 84,
+                "floatField": 6.28,
+                "boolField": False,
+                "enumField": "value2",
+            }
+        },
+        "optionalSequence": ["x", "y", "z"],
+    }
+
+    model = serializer.from_dict(data, CollectionsModel)
+
+    # Verify main fields
+    assert list(model.sequence_field) == ["a", "b", "c"]
+    assert len(model.list_field) == 1
+    assert model.list_field[0].string_field == "item"
+    assert model.dict_field["key"].string_field == "dict_item"
+
+    # Verify optional fields
+    assert model.optional_sequence is not None
+    assert list(model.optional_sequence) == ["x", "y", "z"]
+
+    # Verify types explicitly
+    assert isinstance(model.optional_sequence, Sequence)
+    assert all(isinstance(item, str) for item in model.optional_sequence)
+    assert isinstance(model.list_field[0], SimpleModel)
+    assert isinstance(model.dict_field["key"], SimpleModel)
+
+
+def test_datetime_with_optional_field_types(serializer):
+    data = {
+        "datetimeField": "2024-01-01T12:00:00",
+        "dateField": "2024-01-01",
+        "optionalDatetime": "2024-01-01T13:00:00",
+    }
+
+    model = serializer.from_dict(data, DateTimeModel)
+
+    # Verify fields
+    assert model.datetime_field == datetime(2024, 1, 1, 12, 0)
+    assert model.date_field == date(2024, 1, 1)
+    assert model.optional_datetime == datetime(2024, 1, 1, 13, 0)
+
+    # Verify types explicitly
+    assert isinstance(model.datetime_field, datetime)
+    assert isinstance(model.date_field, date)
+    assert isinstance(model.optional_datetime, datetime)
+
+
+def test_optional_field_absent(serializer):
+    data = {
+        "name": "test",
+        "simple": {
+            "stringField": "nested",
+            "intField": 42,
+            "floatField": 3.14,
+            "boolField": True,
+            "enumField": "value1",
+        },
+    }
+
+    model = serializer.from_dict(data, NestedModel)
+
+    # Verify optional fields are None when absent
+    assert model.optional_simple is None
+
+    # Verify types explicitly
+    assert isinstance(model.simple, SimpleModel)
+    assert model.optional_simple is None
+
+
+@pytest.mark.parametrize(
+    "input_str, expected",
+    [
+        ("test", "test"),
+        ("test_test", "testTest"),
+        ("Test_TeSt", "TestTest"),
+        ("", ""),
+        ("test123test4_5_6_test", "test123test456Test"),
+        ("test-test", "test-test"),
+    ],
+)
+def test_camelize(input_str, expected):
+    assert snake_to_camel(input_str) == expected
+
+
+@pytest.mark.parametrize(
+    "expected, input_str",
+    [
+        ("test", "test"),
+        ("test_test", "testTest"),
+        ("test_test", "TestTest"),
+        ("", ""),
+        ("test123test456_test", "test123test456Test"),
+        ("test-test", "test-test"),
+    ],
+)
+def test_snaking(input_str, expected):
+    assert camel_to_snake(input_str) == expected
diff --git a/python/tests/test_messages.py b/python/tests/test_messages.py
new file mode 100644
index 00000000..63190b7f
--- /dev/null
+++ b/python/tests/test_messages.py
@@ -0,0 +1,181 @@
+import pytest
+
+from cucumber_messages import Attachment, AttachmentContentEncoding, Envelope, SourceMediaType
+from cucumber_messages import TestStepResultStatus as TTestStepResultStatus
+from cucumber_messages import message_converter as default_converter
+
+
+@pytest.fixture
+def converter():
+    return default_converter
+
+
+def test_basic_attachment_serialization(converter):
+    data = {
+        "body": "some body",
+        "contentEncoding": "IDENTITY",
+        "mediaType": "text/plain",
+        "fileName": "myfile.txt",
+    }
+
+    attachment = converter.from_dict(data, Attachment)
+    assert attachment.body == "some body"
+    assert attachment.content_encoding == AttachmentContentEncoding.identity
+    assert attachment.media_type == "text/plain"
+    assert attachment.file_name == "myfile.txt"
+
+    # Round-trip serialization
+    serialized = converter.to_dict(attachment)
+    assert serialized == data
+
+
+def test_envelope_with_attachment(converter):
+    data = {
+        "attachment": {
+            "body": "some body",
+            "contentEncoding": "BASE64",
+            "mediaType": "text/x.cucumber.gherkin+plain",
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.attachment is not None
+    assert envelope.attachment.body == "some body"
+    assert envelope.attachment.content_encoding == AttachmentContentEncoding.base64
+    assert envelope.attachment.media_type == "text/x.cucumber.gherkin+plain"
+
+    # Round-trip serialization
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_envelope_with_source(converter):
+    data = {
+        "source": {
+            "data": "Feature: Sample\nScenario: Test\n",
+            "mediaType": "text/x.cucumber.gherkin+plain",
+            "uri": "features/sample.feature",
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.source is not None
+    assert envelope.source.data == "Feature: Sample\nScenario: Test\n"
+    assert envelope.source.media_type == SourceMediaType.text_x_cucumber_gherkin_plain
+    assert envelope.source.uri == "features/sample.feature"
+
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_test_run_finished_with_optional_fields(converter):
+    data = {
+        "testRunFinished": {
+            "success": True,
+            "timestamp": {"seconds": 1700000000, "nanos": 123456789},
+            # exception and message are omitted, should be None after deserialization
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.test_run_finished is not None
+    assert envelope.test_run_finished.success is True
+    assert envelope.test_run_finished.timestamp.seconds == 1700000000
+    assert envelope.test_run_finished.timestamp.nanos == 123456789
+    assert envelope.test_run_finished.exception is None
+    assert envelope.test_run_finished.message is None
+
+    # Round-trip serialization
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_test_case_finished(converter):
+    data = {
+        "testCaseFinished": {
+            "testCaseStartedId": "some_test_case_started_id",
+            "timestamp": {"seconds": 1600000000, "nanos": 500},
+            "willBeRetried": False,
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.test_case_finished is not None
+    assert envelope.test_case_finished.test_case_started_id == "some_test_case_started_id"
+    assert envelope.test_case_finished.timestamp.seconds == 1600000000
+    assert envelope.test_case_finished.timestamp.nanos == 500
+    assert envelope.test_case_finished.will_be_retried is False
+
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_exception_serialization(converter):
+    data = {
+        "testRunFinished": {
+            "success": False,
+            "timestamp": {"seconds": 1700000001, "nanos": 1000},
+            "exception": {
+                "type": "AssertionError",
+                "message": "Expected 'X' but got 'Y'",
+                "stackTrace": "Traceback (most recent call last): ...",
+            },
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.test_run_finished is not None
+    exc = envelope.test_run_finished.exception
+    assert exc is not None
+    assert exc.type == "AssertionError"
+    assert exc.message == "Expected 'X' but got 'Y'"
+    assert exc.stack_trace.startswith("Traceback (most recent call last)")
+
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_test_step_result(converter):
+    data = {
+        "testStepFinished": {
+            "testCaseStartedId": "tcs_id_123",
+            "testStepId": "ts_id_456",
+            "testStepResult": {
+                "duration": {"seconds": 3, "nanos": 500000000},
+                "status": "PASSED",
+                "message": "Step executed successfully",
+            },
+            "timestamp": {"seconds": 1700000020, "nanos": 0},
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.test_step_finished is not None
+    result = envelope.test_step_finished.test_step_result
+    assert result.status == TTestStepResultStatus.passed
+    assert result.duration.seconds == 3
+    assert result.duration.nanos == 500000000
+    assert result.message == "Step executed successfully"
+
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
+
+
+def test_missing_optional_fields(converter):
+    # No optional fields set, serializer should handle defaults
+    data = {
+        "attachment": {
+            "body": "no optional fields",
+            "contentEncoding": "IDENTITY",
+            "mediaType": "text/plain",
+        }
+    }
+
+    envelope = converter.from_dict(data, Envelope)
+    assert envelope.attachment is not None
+    assert envelope.attachment.file_name is None
+    assert envelope.attachment.source is None
+    assert envelope.attachment.url is None
+
+    serialized = converter.to_dict(envelope)
+    assert serialized == data
diff --git a/python/tests/test_model_load.py b/python/tests/test_model_load.py
new file mode 100644
index 00000000..5a801e02
--- /dev/null
+++ b/python/tests/test_model_load.py
@@ -0,0 +1,61 @@
+import json
+import re
+from pathlib import Path
+
+from git import Repo
+from packaging import version
+from pytest import fixture
+
+from cucumber_messages import Envelope, message_converter
+
+
+@fixture
+def compatibility_kit_repo(tmpdir):
+    repo_path = Path(tmpdir) / "compatibility-kit"
+    repo = Repo.clone_from(
+        "https://github.com/cucumber/compatibility-kit.git",
+        str(repo_path),
+        branch="main",
+    )
+    repo_tags = list(
+        filter(
+            lambda tag: tag is not None,
+            map(lambda tag: getattr(tag.tag, "tag", None), repo.tags),
+        )
+    )
+
+    version_pattern = re.compile(r"((.*/)?)v(\d+\.\d+\.\d+)")
+    last_version = sorted(
+        map(
+            version.parse,
+            map(
+                lambda match: match.groups()[-1],
+                filter(
+                    lambda match: match is not None,
+                    map(lambda tag: re.match(version_pattern, tag), repo_tags),
+                ),
+            ),
+        )
+    )[-1]
+
+    last_version_tag = next(filter(lambda tag: re.search(re.escape(str(last_version)), tag), repo_tags))
+
+    repo.git.checkout(last_version_tag)
+
+    return repo_path
+
+
+# Analog of "ruby/spec/cucumber/messages/acceptance_spec.rb" test
+def test_simple_load_model(compatibility_kit_repo):
+    for ast_path in (compatibility_kit_repo / "devkit" / "samples").rglob("*.ndjson"):
+        print(f"Checking ${ast_path}")
+        with ast_path.open(mode="r") as ast_file:
+            for ast_line in ast_file:
+                model_datum = json.loads(ast_line)
+                model = message_converter.from_dict(model_datum, Envelope)
+
+                assert isinstance(model, Envelope)
+
+                dumped_ast_datum = message_converter.to_dict(model)
+
+                assert model_datum == dumped_ast_datum