Skip to content

Commit

Permalink
Improve the detection of schema changes and add tests (#715)
Browse files Browse the repository at this point in the history
* Start to generalize schema evolution checking

* Add basic test setup for running detection script tests

* Make the checking script exit with non-zero for errors

* Add detection for vector member additions

* Add more tests for detecting changes to VectorMembers

* Add (failing) tests related to relation schema changes

* Add detection of relation schema changes

* Remove unnecessary named variable

* Simplify logic and re-use messages from SchemaChanges

* Harmonize messages with the YAML grammar

* Change import order to make pylint happy
  • Loading branch information
tmadlener authored Dec 17, 2024
1 parent f6662ad commit ec61a51
Show file tree
Hide file tree
Showing 28 changed files with 410 additions and 9 deletions.
120 changes: 114 additions & 6 deletions python/podio_schema_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Provides infrastructure for analyzing schema definitions for schema evolution
"""

import sys
import yaml

from podio_gen.podio_config_reader import PodioConfigReader
Expand Down Expand Up @@ -123,6 +124,62 @@ def __init__(self, name, member_name_old, member_name_new):
)


class AddedVectorMember(SchemaChange):
"""Class representing an added VectorMember"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(f"'{self.klassname}' has added a VectorMember '{self.member}'")


class DroppedVectorMember(SchemaChange):
"""Class representing a dropped VectorMember"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(f"'{self.klassname}' has a dropped VectorMember '{self.member.name}")


class AddedSingleRelation(SchemaChange):
"""Class representing an added OneToOneRelation"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(f"'{self.klassname}' has added a OneToOneRelation '{self.member.name}'")


class DroppedSingleRelation(SchemaChange):
"""Class representing a dropped OneToOneRelation"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(f"'{self.klassname}' has dropped a OneToOneRelation '{self.member.name}'")


class AddedMultiRelation(SchemaChange):
"""Class representing an added OneToManyRelation"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(f"'{self.klassname}' has added a OneToManyRelation '{self.member.name}'")


class DroppedMultiRelation(SchemaChange):
"""Class representing a dropped OneToManyRelation"""

def __init__(self, member, datatype):
self.member = member
self.klassname = datatype
super().__init__(
f"'{self.klassname}' has dropped a OneToManyRelation '{self.member.name}'"
)


class RootIoRule:
"""A placeholder IORule class"""

Expand Down Expand Up @@ -164,6 +221,15 @@ class DataModelComparator:
Compares two datamodels and extracts required schema evolution
"""

unsupported_changes = (
AddedVectorMember,
DroppedVectorMember,
AddedSingleRelation,
DroppedSingleRelation,
AddedMultiRelation,
DroppedMultiRelation,
)

def __init__(self, yamlfile_new, yamlfile_old, evolution_file=None) -> None:
self.yamlfile_new = yamlfile_new
self.yamlfile_old = yamlfile_old
Expand Down Expand Up @@ -205,7 +271,7 @@ def _compare_components(self) -> None:
]
)

self._compare_definitions(
self._compare_members(
kept_components,
self.datamodel_new.components,
self.datamodel_old.components,
Expand All @@ -226,14 +292,49 @@ def _compare_datatypes(self) -> None:
[DroppedDatatype(self.datamodel_old.datatypes[name], name) for name in dropped_types]
)

self._compare_definitions(
self._compare_members(
kept_types,
self.datamodel_new.datatypes,
self.datamodel_old.datatypes,
"Members",
)

def _compare_definitions(self, definitions, first, second, category) -> None:
self._compare_members(
kept_types,
self.datamodel_new.datatypes,
self.datamodel_old.datatypes,
"VectorMembers",
AddedVectorMember,
DroppedVectorMember,
)

self._compare_members(
kept_types,
self.datamodel_new.datatypes,
self.datamodel_old.datatypes,
"OneToOneRelations",
AddedSingleRelation,
DroppedSingleRelation,
)

self._compare_members(
kept_types,
self.datamodel_new.datatypes,
self.datamodel_old.datatypes,
"OneToManyRelations",
AddedMultiRelation,
DroppedMultiRelation,
)

def _compare_members(
self,
definitions,
first,
second,
category,
added_change=AddedMember,
dropped_change=DroppedMember,
) -> None:
"""compare member definitions in old and new datamodel"""
for name in definitions:
# we are only interested in members not the extracode
Expand All @@ -244,10 +345,10 @@ def _compare_definitions(self, definitions, first, second, category) -> None:
)
# Make findings known globally
self.detected_schema_changes.extend(
[AddedMember(members1[member], name) for member in added_members]
[added_change(members1[member], name) for member in added_members]
)
self.detected_schema_changes.extend(
[DroppedMember(members2[member], name) for member in dropped_members]
[dropped_change(members2[member], name) for member in dropped_members]
)

# now let's compare old and new for the kept members
Expand Down Expand Up @@ -327,6 +428,9 @@ def heuristics(self):
added_members = [change for change in schema_changes if isinstance(change, AddedMember)]
self.heuristics_members(added_members, dropped_members, schema_changes)

for change in (c for c in schema_changes if isinstance(c, self.unsupported_changes)):
self.errors.append(f"Unsupported schema change: {change}")

# are the member changes actually supported/supportable?
changed_members = [
change for change in schema_changes if isinstance(change, ChangedMember)
Expand Down Expand Up @@ -415,6 +519,9 @@ def print_comparison(self):
print("ERRORS:")
for error in self.errors:
print(f" - {error}")
return False

return True

def read(self) -> None:
"""read datamodels from yaml files"""
Expand Down Expand Up @@ -474,5 +581,6 @@ def read_evolution_file(self) -> None:
comparator = DataModelComparator(args.new, args.old, evolution_file=args.evo)
comparator.read()
comparator.compare()
comparator.print_comparison()
if not comparator.print_comparison():
sys.exit(1)
# print(comparator.get_changed_schemata(schema_filter=root_filter))
4 changes: 3 additions & 1 deletion tests/schema_evolution/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ add_test(
${CMAKE_CURRENT_SOURCE_DIR}/datalayout_notpossible.yaml
${PROJECT_SOURCE_DIR}/tests/datalayout.yaml
)
set_property(TEST schema-evolution-script-with-failure PROPERTY WILL_FAIL)
set_property(TEST schema-evolution-script-with-failure PROPERTY WILL_FAIL true)

add_subdirectory(detection)

add_subdirectory(root_io)
32 changes: 32 additions & 0 deletions tests/schema_evolution/detection/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
set(should_fail_cases
vector_members:add_new
vector_members:add_additional
vector_members:rm_one
relations:new_single_relation
relations:rm_single_relation
relations:new_multi_relation
relations:rm_multi_relation
relations:mv_single_to_multi
relations:mv_multi_to_single
)

set(should_succeed_cases
members:float_to_double
)

set(should_warn_cases
members:rename
)

foreach(test_case IN LISTS should_fail_cases should_succeed_cases should_warn_cases)
add_test(NAME schema_evol:detection:${test_case} COMMAND ${CMAKE_CURRENT_LIST_DIR}/run_test_case.sh ${test_case})
PODIO_SET_TEST_ENV(schema_evol:detection:${test_case})
endforeach()

foreach(test_case IN LISTS should_fail_cases)
set_property(TEST schema_evol:detection:${test_case} PROPERTY WILL_FAIL true)
endforeach()

foreach(test_case IN LISTS should_warn_cases)
set_property(TEST schema_evol:detection:${test_case} PROPERTY PASS_REGULAR_EXPRESSION "Warnings:")
endforeach()
27 changes: 27 additions & 0 deletions tests/schema_evolution/detection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# *Detection* tests for `podio_schema_evolution.py`

This folder contains small and targetted test cases to ensure that the
`podio_schema_evolution.py` script reliably detects schema changes that are not
trivial. These test cases only deal with detecting these occurences! Whether the
detected schema evolution steps are supported by podio (yet) are not really
interesting here and they (will be) tested in another place.

## Setup of the tests

In order to allow for some minimal form of automation and to avoid too much
boilerplate the test cases are grouped into categories. Each category then has
several *unit test like* setups where each test covers exactly one schema
change. Each subfolder in this directory represents a category. In each
subfolder there are for each test case (i.e. schema change) exactly two yaml
files with the (minimal) schemas that have an example of the change. To allow
for test automation these yaml files need to follow the naming convention
`dm_<test-case-name>_{old,new}.yaml`, where the `old` yaml file needs to have a
lower `schema_version` than the `new` yaml file.

The `run_test_case.sh` script takes one argument in the form of
`<category-name>:<test-case-name>`. It constructs the necessary file names from
this input and then runs the `podio_schema_evolution.py` script on them.

Finally, in the `CMakeLists.txt` file here, it is simply necessary to add new
test cases to the `should_fail_cases`, `should_succeed_cases` or
`should_warn_cases` lists and they will be automatically picked up.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
schema_version: 2

datatypes:
TypeWithFloat:
Description: "A type with a double that was a float"
Author: "Thomas Madlener"
Members:
- double f // a float to become a double
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
schema_version: 1

datatypes:
TypeWithFloat:
Description: "A type with a float that will become a double"
Author: "Thomas Madlener"
Members:
- float f // a float to become a double
8 changes: 8 additions & 0 deletions tests/schema_evolution/detection/members/dm_rename_new.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
schema_version: 3

datatypes:
TypeWithRenamedMember:
Description: "A type with a renamed member"
Author: "Thomas Madlener"
Members:
- int newName // the new name
8 changes: 8 additions & 0 deletions tests/schema_evolution/detection/members/dm_rename_old.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
schema_version: 1

datatypes:
TypeWithRenamedMember:
Description: "A type with a member that will be renamed"
Author: "Thomas Madlener"
Members:
- int oldName // the old name
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 2

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithRelationMigration:
Description: "Type for testing the move from a OneToMany to a OneToOne relation"
Author: "Thomas Madlener"
OneToOneRelations:
- RelatedType rel // a relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 1

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithRelationMigration:
Description: "Type for testing the move from a OneToMany to a OneToOne relation"
Author: "Thomas Madlener"
OneToManyRelations:
- RelatedType rel // a relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 2

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithRelationMigration:
Description: "Type for testing the move from a OneToOne to a OneToMany relation"
Author: "Thomas Madlener"
OneToManyRelations:
- RelatedType rel // a relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 1

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithRelationMigration:
Description: "Type for testing the move from a OneToOne to a OneToMany relation"
Author: "Thomas Madlener"
OneToOneRelations:
- RelatedType rel // a relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 2

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithNewMultiRelation:
Description: "Type for testing the addition of new OneToManyRelations"
Author: "Thomas Madlener"
OneToManyRelations:
- RelatedType rel // a new relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
schema_version: 1

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithNewMultiRelation:
Description: "Type for testing the addition of new OneToManyRelations"
Author: "Thomas Madlener"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
schema_version: 2

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithNewSingleRelation:
Description: "Type for testing the addition of new OneToOneRelations"
Author: "Thomas Madlener"
OneToOneRelations:
- RelatedType rel // a new relation
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
schema_version: 1

datatypes:
RelatedType:
Description: "A type we use in the relation"
Author: "Thomas Madlener"

DataTypeWithNewSingleRelation:
Description: "Type for testing the addition of new OneToOneRelations"
Author: "Thomas Madlener"
Loading

0 comments on commit ec61a51

Please sign in to comment.