From 7c0e5c49dd565a481bb9d4aa73667a192faca6c5 Mon Sep 17 00:00:00 2001 From: Chris Chamberlain Date: Thu, 30 Jan 2025 14:27:03 +1300 Subject: [PATCH 1/3] made parent fault filter chainable; added support for symmetric difference operation; fixed some small docstring issues; updated changelog; --- CHANGELOG.md | 2 + solvis/filter/chainable_set_base.py | 2 + solvis/filter/parent_fault_id_filter.py | 56 ++++++++++++++------- solvis/filter/rupture_id_filter.py | 9 ++-- solvis/filter/subsection_id_filter.py | 8 +-- solvis/solution/typing.py | 1 + test/filter/test_filter_parent_fault_ids.py | 36 +++++++++++++ 7 files changed, 88 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aaf3c36..df2f664 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - updated flake8 and applied many docstring fixes. - refactored participation methods into module/class. - use valid NSHM fault names in docstring examples. + - `FilterParentFaultIds` class is now chainable, like the other filters. ## Added - new filter package providing classes for filtering solutions @@ -31,6 +32,7 @@ - added participation methods to fault_system_solution - a simple rupture grouping algorithm (can this be a different type of filter??); - `pandera` library for dataframe model validations and better docs + - ChainableSet now supports set.symmetric_difference ## Removed - deprecated `solvis.solvis` functions removed. diff --git a/solvis/filter/chainable_set_base.py b/solvis/filter/chainable_set_base.py index 860b429..9e92619 100644 --- a/solvis/filter/chainable_set_base.py +++ b/solvis/filter/chainable_set_base.py @@ -37,6 +37,8 @@ def new_chainable_set( instance._chained_set = set.union(result, self.chained_set) if self.chained_set else result elif join_prior == SetOperationEnum.DIFFERENCE: instance._chained_set = set.difference(result, self.chained_set) if self.chained_set else result + elif join_prior == SetOperationEnum.SYMMETRIC_DIFFERENCE: + instance._chained_set = set.symmetric_difference(result, self.chained_set) if self.chained_set else result else: raise ValueError(f"Unsupported join type {join_prior}") # pragma: no cover return instance diff --git a/solvis/filter/parent_fault_id_filter.py b/solvis/filter/parent_fault_id_filter.py index 702d8f2..198e052 100644 --- a/solvis/filter/parent_fault_id_filter.py +++ b/solvis/filter/parent_fault_id_filter.py @@ -2,7 +2,7 @@ This module provides a class for filtering solution parent faults. Classes: - FilterParentFaultIds: a filter for parent faults, returning qualifying fault_ids. + FilterParentFaultIds: a chainable filter for parent faults, returning qualifying fault ids. ParentFaultMapping: a namedtuple representing id and name of a parent fault. Functions: @@ -11,20 +11,29 @@ Examples: ```py - >>> model = InversionSolution.from_archive(filename).model - >>> parent_fault_ids = FilterParentFaultIds(model)\ + >>> solution = InversionSolution.from_archive(filename) + >>> parent_fault_ids = FilterParentFaultIds(solution)\ .for_parent_fault_names(['Alpine: Jacksons to Kaniere', 'BooBoo']) + + >>> # chained with rutpure id filter + >>> rupture_ids = FilterRuptureIds(solution)\ + .for_magnitude(min_mag=5.75, max_mag=6.25)\ + + >>> parent_fault_ids = FilterParentFaultIds(solution)\ + .for_parent_fault_names(['Alpine: Jacksons to Kaniere'])\ + .for_rupture_ids(rupture_ids) ``` TODO: - make FilterParentFaultIds chainable """ -from typing import Iterable, Iterator, NamedTuple, Set +from typing import Iterable, Iterator, NamedTuple, Set, Union import shapely.geometry -from ..solution.typing import InversionSolutionProtocol +from ..solution.typing import InversionSolutionProtocol, SetOperationEnum +from .chainable_set_base import ChainableSetBase class ParentFaultMapping(NamedTuple): @@ -71,7 +80,7 @@ def valid_parent_fault_names(solution, validate_names: Iterable[str]) -> Set[str return set(validate_names) -class FilterParentFaultIds: +class FilterParentFaultIds(ChainableSetBase): """A helper class to filter parent faults, returning qualifying fault_ids. Class methods all return sets to make it easy to combine filters with @@ -82,6 +91,7 @@ class FilterParentFaultIds: >>> solution = InversionSolution.from_archive(filename) >>> parent_fault_ids = FilterParentFaultIds(solution)\ .for_parent_fault_names(['Alpine: Jacksons to Kaniere']) + . ``` """ @@ -96,7 +106,7 @@ def __init__(self, solution: InversionSolutionProtocol): def for_named_faults(self, named_fault_names: Iterable[str]): raise NotImplementedError() - def all(self) -> Set[int]: + def all(self) -> ChainableSetBase: """Convenience method returning ids for all solution parent faults. NB the usual `join_prior` argument is not implemented as it doesn't seem useful here. @@ -105,16 +115,19 @@ def all(self) -> Set[int]: the parent_fault_ids. """ result = set(self._solution.solution_file.fault_sections['ParentID'].tolist()) - return result + return self.new_chainable_set(result, self._solution) - def for_parent_fault_names(self, parent_fault_names: Iterable[str]) -> Set[int]: + def for_parent_fault_names( + self, parent_fault_names: Iterable[str], join_prior: Union[SetOperationEnum, str] = 'intersection' + ) -> ChainableSetBase: """Find parent fault ids for the given parent_fault names. Args: parent_fault_names: A list of one or more `parent_fault` names. + join_prior: How to join this methods' result with the prior chain (if any) (default = 'intersection'). Returns: - The fault_ids matching the filter. + A chainable set of fault_ids matching the filter. Raises: ValueError: If any `parent_fault_names` argument is not valid. @@ -123,34 +136,43 @@ def for_parent_fault_names(self, parent_fault_names: Iterable[str]) -> Set[int]: ids = df0[df0['ParentName'].isin(list(valid_parent_fault_names(self._solution, parent_fault_names)))][ 'ParentID' ].tolist() - return set([int(id) for id in ids]) + result = set([int(id) for id in ids]) + return self.new_chainable_set(result, self._solution, join_prior=join_prior) - def for_subsection_ids(self, fault_section_ids: Iterable[int]) -> Set[int]: + def for_subsection_ids( + self, fault_section_ids: Iterable[int], join_prior: Union[SetOperationEnum, str] = 'intersection' + ) -> ChainableSetBase: """Find parent fault ids that contain any of the given fault_section_ids. Args: fault_section_ids: A list of one or more fault_section ids. + join_prior: How to join this methods' result with the prior chain (if any) (default = 'intersection'). Returns: - The fault_ids matching the filter. + A chainable set of fault_ids matching the filter. """ df0 = self._solution.solution_file.fault_sections ids = df0[df0['FaultID'].isin(list(fault_section_ids))]['ParentID'].unique().tolist() - return set([int(id) for id in ids]) + result = set([int(id) for id in ids]) + return self.new_chainable_set(result, self._solution, join_prior=join_prior) def for_polygon(self, polygon: shapely.geometry.Polygon, contained: bool = True): raise NotImplementedError() - def for_rupture_ids(self, rupture_ids: Iterable[int]) -> Set[int]: + def for_rupture_ids( + self, rupture_ids: Iterable[int], join_prior: Union[SetOperationEnum, str] = 'intersection' + ) -> ChainableSetBase: """Find parent_fault_ids for the given rupture_ids. Args: rupture_ids: A list of one or more rupture ids. + join_prior: How to join this methods' result with the prior chain (if any) (default = 'intersection'). Returns: - The parent_fault_ids matching the filter. + A chainable set of parent fault_ids matching the filter. """ # df0 = self._solution.solution_file.rupture_sections df0 = self._solution.model.fault_sections_with_rupture_rates ids = df0[df0['Rupture Index'].isin(list(rupture_ids))].ParentID.unique().tolist() - return set([int(id) for id in ids]) + result = set([int(id) for id in ids]) + return self.new_chainable_set(result, self._solution, join_prior=join_prior) diff --git a/solvis/filter/rupture_id_filter.py b/solvis/filter/rupture_id_filter.py index 4505217..7d0d4bb 100644 --- a/solvis/filter/rupture_id_filter.py +++ b/solvis/filter/rupture_id_filter.py @@ -2,7 +2,7 @@ This module provides a class for filtering solution ruptures. Classes: - FilterRuptureIds: a filter for ruptures, returning qualifying rupture ids. + FilterRuptureIds: a chainable filter for ruptures, returning qualifying rupture ids. Examples: ```py @@ -10,19 +10,18 @@ >>> ham50 = solvis.circle_polygon(50000, -37.78, 175.28) # 50km radius around Hamilton >>> solution = solvis.InversionSolution.from_archive(filename) - >>> model = solution.model - >>> rupture_ids = FilterRuptureIds(model)\ + >>> rupture_ids = FilterRuptureIds(solution)\ .for_magnitude(min_mag=5.75, max_mag=6.25)\ .for_polygon(ham50) >>> # ruptures on any of faults A, B, with magnitude and rupture rate limits - >>> rupture_ids = FilterRuptureIds(model)\ + >>> rupture_ids = FilterRuptureIds(solution)\ >>> .for_parent_fault_names(['Alpine: Jacksons to Kaniere', 'Vernon 1' ])\ >>> .for_magnitude(7.0, 8.0)\ >>> .for_rupture_rate(1e-6, 1e-2) >>> # ruptures on fault A that do not involve fault B: - >>> rupture_ids = FilterRuptureIds(model)\ + >>> rupture_ids = FilterRuptureIds(solution)\ >>> .for_parent_fault_names(['Alpine: Jacksons to Kaniere'])\ >>> .for_parent_fault_names(['Vernon 1'], join_prior='difference') ``` diff --git a/solvis/filter/subsection_id_filter.py b/solvis/filter/subsection_id_filter.py index 8dbe795..db73714 100644 --- a/solvis/filter/subsection_id_filter.py +++ b/solvis/filter/subsection_id_filter.py @@ -2,18 +2,18 @@ This module provides a class for filtering solution fault sections (subsections). Classes: - FilterSubsectionIds: a filter for ruptures, returning qualifying rupture ids. + FilterSubsectionIds: a chainable filter for fault sections, returning qualifying fault section ids. Examples: ```py >>> ham50 = solvis.circle_polygon(50000, -37.78, 175.28) # 50km radius around Hamilton - >>> sol = solvis.InversionSolution.from_archive(filename) - >>> rupture_ids = FilterRuptureIds(sol)\ + >>> solution = solvis.InversionSolution.from_archive(filename) + >>> rupture_ids = FilterRuptureIds(solution)\ .for_magnitude(min_mag=5.75, max_mag=6.25)\ .for_polygon(ham50) - >>> subsection_ids = FilterSubsectionIds(sol)\ + >>> subsection_ids = FilterSubsectionIds(solution)\ >>> .for_rupture_ids(rupture_ids) ``` """ diff --git a/solvis/solution/typing.py b/solvis/solution/typing.py index 2196b8c..ec9babf 100644 --- a/solvis/solution/typing.py +++ b/solvis/solution/typing.py @@ -284,3 +284,4 @@ class SetOperationEnum(Enum): UNION = 1 INTERSECTION = 2 DIFFERENCE = 3 + SYMMETRIC_DIFFERENCE = 4 diff --git a/test/filter/test_filter_parent_fault_ids.py b/test/filter/test_filter_parent_fault_ids.py index e97228e..71e3f8a 100644 --- a/test/filter/test_filter_parent_fault_ids.py +++ b/test/filter/test_filter_parent_fault_ids.py @@ -75,3 +75,39 @@ def test_parent_faults_for_ruptures(filter_parent_fault_ids, filter_rupture_ids, # there will be more parent faults, given all those ruptures on the original parents assert filter_parent_fault_ids.for_rupture_ids(rupt_ids).issuperset(pids) + + +def test_parent_faults_filter_chaining(filter_parent_fault_ids, crustal_solution_fixture): + # filter_rupture_ids = FilterParentFaultIds(crustal_solution_fixture) + pnames = random.sample(crustal_solution_fixture.model.parent_fault_names, 2) + + together = filter_parent_fault_ids.for_parent_fault_names(pnames) + first = filter_parent_fault_ids.for_parent_fault_names(pnames[:1]) + second = filter_parent_fault_ids.for_parent_fault_names(pnames[1:]) + + assert together.difference(second) == first + assert together.difference(first) == second + + ## union + chained = filter_parent_fault_ids.for_parent_fault_names(pnames[:1]).for_parent_fault_names( + pnames[1:], join_prior='union' + ) + assert together == chained + + ## difference + diff = filter_parent_fault_ids.for_parent_fault_names(pnames).for_parent_fault_names( + pnames[1:], join_prior='difference' + ) + assert diff == second.difference(together) + + ## symmetric_differnce + diff = filter_parent_fault_ids.for_parent_fault_names(pnames).for_parent_fault_names( + pnames[1:], join_prior='symmetric_difference' + ) + assert diff == second.symmetric_difference(together) + + ## intersection + intersect = filter_parent_fault_ids.for_parent_fault_names(pnames).for_parent_fault_names( + pnames[1:] + ) # default join_prior is `intersection` + assert intersect == second From 419d93f9545d6d73ba1fbc3da397bf2a9abdbb60 Mon Sep 17 00:00:00 2001 From: Chris Chamberlain Date: Thu, 30 Jan 2025 14:30:44 +1300 Subject: [PATCH 2/3] remove TODO comment; --- solvis/filter/parent_fault_id_filter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/solvis/filter/parent_fault_id_filter.py b/solvis/filter/parent_fault_id_filter.py index 198e052..dbc0967 100644 --- a/solvis/filter/parent_fault_id_filter.py +++ b/solvis/filter/parent_fault_id_filter.py @@ -23,9 +23,6 @@ .for_parent_fault_names(['Alpine: Jacksons to Kaniere'])\ .for_rupture_ids(rupture_ids) ``` - -TODO: - - make FilterParentFaultIds chainable """ from typing import Iterable, Iterator, NamedTuple, Set, Union From 171973f32483b87614ae915461a44d6b273ebf9b Mon Sep 17 00:00:00 2001 From: Chris Chamberlain Date: Thu, 30 Jan 2025 14:49:44 +1300 Subject: [PATCH 3/3] fix for #57 (return int type for polygons); --- solvis/filter/rupture_id_filter.py | 2 +- test/filter/test_filter_rupture_ids.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/solvis/filter/rupture_id_filter.py b/solvis/filter/rupture_id_filter.py index 7d0d4bb..1928ce7 100644 --- a/solvis/filter/rupture_id_filter.py +++ b/solvis/filter/rupture_id_filter.py @@ -305,5 +305,5 @@ def for_polygon( df1 = self._solution.model.rupture_sections df2 = df1.join(df0, 'section', how='inner') - result = set(df2[index].unique()) + result = set(df2[index].tolist()) return self.new_chainable_set(result, self._solution, self._drop_zero_rates, join_prior=join_prior) diff --git a/test/filter/test_filter_rupture_ids.py b/test/filter/test_filter_rupture_ids.py index 457276f..d239d23 100644 --- a/test/filter/test_filter_rupture_ids.py +++ b/test/filter/test_filter_rupture_ids.py @@ -73,6 +73,13 @@ def test_ruptures_for_polygon_intersecting(crustal_solution_fixture, filter_rupt ).issubset(rupture_ids) +def test_ruptures_for_polygon_type(crustal_solution_fixture, filter_rupture_ids): + MRO = location_by_id('MRO') + poly = circle_polygon(1.5e5, MRO['latitude'], MRO['longitude']) # 150km circle around MRO + rids = filter_rupture_ids.for_polygon(poly) + assert isinstance(list(rids)[0], int) + + def test_ruptures_for_polygons_join_iterable(crustal_solution_fixture, filter_rupture_ids): WLG = location_by_id('WLG') MRO = location_by_id('MRO')