From c4fbdb868e3f3f8a6f19093be3306d35eb538549 Mon Sep 17 00:00:00 2001 From: Matthias Schnepf Date: Mon, 13 Sep 2021 10:34:53 +0200 Subject: [PATCH 1/3] Remove granularity With `granularity` new drones are requested when `demand>supply+granularity`. Remove granularity in Standardiser to enable the creation of new drones when `demand>supply`. I think that is more intuitive and solves problems as discussed in [Fix stepwise controller documentation](https://github.com/MatterMiners/cobald/pull/93). --- tardis/resources/poolfactory.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tardis/resources/poolfactory.py b/tardis/resources/poolfactory.py index e4f0f2de..06946a87 100644 --- a/tardis/resources/poolfactory.py +++ b/tardis/resources/poolfactory.py @@ -80,7 +80,6 @@ def create_composite_pool(configuration: str = None) -> WeightedComposite: Standardiser( FactoryPool(*check_pointed_drones, factory=drone_factory), minimum=cpu_cores, - granularity=cpu_cores, ), name=f"{site.name.lower()}_{machine_type.lower()}", ) From ba32d74ef20f64fc8f4f548424336455dd98d1b9 Mon Sep 17 00:00:00 2001 From: "Matthias J. Schnepf" Date: Fri, 17 Sep 2021 09:52:56 +0200 Subject: [PATCH 2/3] Fix unittest --- tests/resources_t/test_poolfactory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/resources_t/test_poolfactory.py b/tests/resources_t/test_poolfactory.py index 8bcfe6a7..f9cd10ea 100644 --- a/tests/resources_t/test_poolfactory.py +++ b/tests/resources_t/test_poolfactory.py @@ -100,7 +100,7 @@ def test_create_composite( self.assertEqual( mock_standardiser.mock_calls, [ - call(mock_factory_pool(), minimum=cpu_cores, granularity=cpu_cores), + call(mock_factory_pool(), minimum=cpu_cores), call(mock_weighted_composite(), maximum=self.config.Sites[0].quota), ], ) From 97282482346f334dbf6c13b99ef417e1ba4a687c Mon Sep 17 00:00:00 2001 From: "Matthias J. Schnepf" Date: Fri, 17 Sep 2021 14:30:33 +0200 Subject: [PATCH 3/3] Fix flake8 B904 warnings --- tardis/adapters/sites/cloudstack.py | 6 +++--- tardis/adapters/sites/htcondor.py | 2 +- tardis/adapters/sites/moab.py | 6 +++--- tardis/adapters/sites/openstack.py | 12 ++++++------ tardis/adapters/sites/slurm.py | 4 ++-- tardis/resources/dronestates.py | 8 ++++---- tardis/utilities/attributedict.py | 4 ++-- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tardis/adapters/sites/cloudstack.py b/tardis/adapters/sites/cloudstack.py index d822775b..2d46c5a6 100644 --- a/tardis/adapters/sites/cloudstack.py +++ b/tardis/adapters/sites/cloudstack.py @@ -94,9 +94,9 @@ def handle_exceptions(self): yield except asyncio.TimeoutError as te: raise TardisTimeout from te - except ClientConnectionError: + except ClientConnectionError as err: logger.warning("Connection reset error") - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from err except CloudStackClientException as ce: log_msg = ( f"Error code: {ce.error_code}, error text: {ce.error_text}, " @@ -104,7 +104,7 @@ def handle_exceptions(self): ) if ce.error_code == 535: logger.warning(f"Quota exceeded: {log_msg}") - raise TardisQuotaExceeded + raise TardisQuotaExceeded from ce elif ce.error_code == 500: if "timed out" in ce.response["message"]: logger.warning(f"Timed out: {log_msg}") diff --git a/tardis/adapters/sites/htcondor.py b/tardis/adapters/sites/htcondor.py index 604ed167..1ed7d350 100644 --- a/tardis/adapters/sites/htcondor.py +++ b/tardis/adapters/sites/htcondor.py @@ -140,7 +140,7 @@ async def resource_status( if ( self._htcondor_queue.last_update - resource_attributes.created ).total_seconds() < 0: - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from None else: return AttributeDict(resource_status=ResourceStatus.Deleted) else: diff --git a/tardis/adapters/sites/moab.py b/tardis/adapters/sites/moab.py index fbf2f7dd..5b757251 100644 --- a/tardis/adapters/sites/moab.py +++ b/tardis/adapters/sites/moab.py @@ -157,11 +157,11 @@ async def resource_status( try: resource_uuid = resource_attributes.remote_resource_uuid resource_status = self._moab_status[str(resource_uuid)] - except KeyError: + except KeyError as err: if ( self._moab_status._last_update - resource_attributes.created ).total_seconds() < 0: - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from err else: resource_status = { "JobID": resource_attributes.remote_resource_uuid, @@ -234,7 +234,7 @@ def handle_exceptions(self): raise TardisTimeout from te except asyncssh.Error as exc: logger.warning("SSH connection failed: " + str(exc)) - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from exc except IndexError as ide: raise TardisResourceStatusUpdateFailed from ide except TardisResourceStatusUpdateFailed: diff --git a/tardis/adapters/sites/openstack.py b/tardis/adapters/sites/openstack.py index dc7caa32..389e0859 100644 --- a/tardis/adapters/sites/openstack.py +++ b/tardis/adapters/sites/openstack.py @@ -105,14 +105,14 @@ def handle_exceptions(self): raise TardisTimeout from te except AuthError as ae: raise TardisAuthError from ae - except ContentTypeError: + except ContentTypeError as cte: logger.warning("OpenStack: content Type Error") - raise TardisResourceStatusUpdateFailed - except ClientError: + raise TardisResourceStatusUpdateFailed from cte + except ClientError as ce: logger.warning("REST client error") - raise TardisDroneCrashed - except ClientConnectionError: + raise TardisDroneCrashed from ce + except ClientConnectionError as cde: logger.warning("Connection reset error") - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from cde except Exception as ex: raise TardisError from ex diff --git a/tardis/adapters/sites/slurm.py b/tardis/adapters/sites/slurm.py index 96154c2f..dc8c6b0a 100644 --- a/tardis/adapters/sites/slurm.py +++ b/tardis/adapters/sites/slurm.py @@ -143,7 +143,7 @@ async def resource_status( # In case the created timestamp is after last update timestamp of the # asynccachemap, no decision about the current state can be given, # since map is updated asynchronously. Just retry later on. - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from None else: resource_status = { "JobID": resource_attributes.remote_resource_uuid, @@ -209,7 +209,7 @@ def handle_exceptions(self): yield except CommandExecutionFailure as ex: logger.warning("Execute command failed: %s" % str(ex)) - raise TardisResourceStatusUpdateFailed + raise TardisResourceStatusUpdateFailed from ex except TardisResourceStatusUpdateFailed: raise except TimeoutError as te: diff --git a/tardis/resources/dronestates.py b/tardis/resources/dronestates.py index 6871ef9f..5c7f88b2 100644 --- a/tardis/resources/dronestates.py +++ b/tardis/resources/dronestates.py @@ -59,12 +59,12 @@ async def resource_status(state_transition, drone: "Drone", current_state: Type[ await drone.site_agent.resource_status(drone.resource_attributes) ) logger.debug(f"Resource attributes: {drone.resource_attributes}") - except (TardisAuthError, TardisTimeout, TardisResourceStatusUpdateFailed): + except (TardisAuthError, TardisTimeout, TardisResourceStatusUpdateFailed) as err: # Retry to get current state of the resource - raise StopProcessing(last_result=current_state()) - except TardisDroneCrashed: + raise StopProcessing(last_result=current_state()) from err + except TardisDroneCrashed as tdc: # Try to cleanup crashed resources - raise StopProcessing(last_result=CleanupState()) + raise StopProcessing(last_result=CleanupState()) from tdc else: return state_transition[drone.resource_attributes.resource_status]() diff --git a/tardis/utilities/attributedict.py b/tardis/utilities/attributedict.py index ed0f2a54..755e7eac 100644 --- a/tardis/utilities/attributedict.py +++ b/tardis/utilities/attributedict.py @@ -16,7 +16,7 @@ def __getattr__(self, item): except KeyError: raise AttributeError( f"{item} is not a valid attribute. Dict contains {str(self)}." - ) + ) from None def __setattr__(self, key, value): self[key] = value @@ -27,4 +27,4 @@ def __delattr__(self, item): except KeyError: raise AttributeError( f"{item} is not a valid attribute. Dict contains {str(self)}." - ) + ) from None