-
Notifications
You must be signed in to change notification settings - Fork 352
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make job scheduler local to task process (#674)
* Make job scheduler local to task process
- Loading branch information
1 parent
baa386e
commit 903138a
Showing
23 changed files
with
808 additions
and
451 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
67 changes: 67 additions & 0 deletions
67
delfin/leader_election/distributor/failed_task_distributor.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# Copyright 2021 The SODA Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
import six | ||
from oslo_config import cfg | ||
from oslo_log import log | ||
|
||
from delfin import db | ||
from delfin.common.constants import TelemetryCollection | ||
from delfin.task_manager import metrics_rpcapi as task_rpcapi | ||
|
||
CONF = cfg.CONF | ||
LOG = log.getLogger(__name__) | ||
|
||
|
||
class FailedTaskDistributor(object): | ||
def __init__(self, ctx): | ||
# create the object of periodic scheduler | ||
self.task_rpcapi = task_rpcapi.TaskAPI() | ||
self.ctx = ctx | ||
|
||
def __call__(self): | ||
|
||
try: | ||
# Remove jobs from scheduler when marked for delete | ||
filters = {'deleted': True} | ||
failed_tasks = db.failed_task_get_all(self.ctx, filters=filters) | ||
LOG.debug("Total failed_tasks found deleted " | ||
"in this cycle:%s" % len(failed_tasks)) | ||
for failed_task in failed_tasks: | ||
self.task_rpcapi.remove_failed_job(self.ctx, failed_task['id'], | ||
failed_task['executor']) | ||
except Exception as e: | ||
LOG.error("Failed to remove periodic scheduling job , reason: %s.", | ||
six.text_type(e)) | ||
try: | ||
failed_tasks = db.failed_task_get_all(self.ctx) | ||
for failed_task in failed_tasks: | ||
# Todo Get executor for the job | ||
LOG.debug('Assigning failed task for for id: ' | ||
'%s' % failed_task['id']) | ||
self.task_rpcapi.assign_failed_job(self.ctx, failed_task['id'], | ||
failed_task['executor']) | ||
|
||
LOG.info('Assigned failed task for id: ' | ||
'%s ' % failed_task['id']) | ||
except Exception as e: | ||
LOG.error("Failed to schedule retry tasks for performance " | ||
"collection, reason: %s", six.text_type(e)) | ||
else: | ||
LOG.info("Schedule collection completed") | ||
|
||
@classmethod | ||
def job_interval(cls): | ||
return TelemetryCollection.FAILED_JOB_SCHEDULE_INTERVAL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# Copyright 2021 The SODA Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
import six | ||
from oslo_config import cfg | ||
from oslo_log import log | ||
|
||
from delfin import db | ||
from delfin.common.constants import TelemetryCollection | ||
from delfin.task_manager import metrics_rpcapi as task_rpcapi | ||
|
||
CONF = cfg.CONF | ||
LOG = log.getLogger(__name__) | ||
|
||
|
||
class TaskDistributor(object): | ||
def __init__(self, ctx): | ||
self.ctx = ctx | ||
self.task_rpcapi = task_rpcapi.TaskAPI() | ||
|
||
# Reset last run time of tasks to restart scheduling and | ||
# start the failed task job | ||
task_list = db.task_get_all(ctx) | ||
for task in task_list: | ||
db.task_update(ctx, task['id'], {'last_run_time': None}) | ||
|
||
def __call__(self): | ||
""" Schedule the collection tasks based on interval """ | ||
|
||
try: | ||
# Remove jobs from scheduler when marked for delete | ||
filters = {'deleted': True} | ||
tasks = db.task_get_all(self.ctx, filters=filters) | ||
LOG.debug("Total tasks found deleted " | ||
"in this cycle:%s" % len(tasks)) | ||
for task in tasks: | ||
self.task_rpcapi.remove_job(self.ctx, task['id'], | ||
task['executor']) | ||
except Exception as e: | ||
LOG.error("Failed to remove periodic scheduling job , reason: %s.", | ||
six.text_type(e)) | ||
|
||
try: | ||
|
||
filters = {'last_run_time': None} | ||
tasks = db.task_get_all(self.ctx, filters=filters) | ||
LOG.debug("Distributing performance collection jobs: total " | ||
"jobs to be handled:%s" % len(tasks)) | ||
for task in tasks: | ||
# Todo Get executor for the job | ||
executor = CONF.host | ||
db.task_update(self.ctx, task['id'], {'executor': executor}) | ||
LOG.info('Assigning executor for collection job for id: ' | ||
'%s' % task['id']) | ||
self.task_rpcapi.assign_job(self.ctx, task['id'], executor) | ||
|
||
LOG.debug('Periodic collection job assigned for id: ' | ||
'%s ' % task['id']) | ||
except Exception as e: | ||
LOG.error("Failed to distribute periodic collection, reason: %s.", | ||
six.text_type(e)) | ||
else: | ||
LOG.debug("Periodic job distribution completed.") | ||
|
||
@classmethod | ||
def job_interval(cls): | ||
return TelemetryCollection.PERIODIC_JOB_INTERVAL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright 2021 The SODA Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
periodical task manager for metric collection tasks** | ||
""" | ||
from oslo_log import log | ||
from delfin import manager | ||
from delfin.task_manager.scheduler import schedule_manager | ||
from delfin.task_manager.scheduler.schedulers.telemetry.job_handler\ | ||
import JobHandler | ||
from delfin.task_manager.scheduler.schedulers.telemetry.job_handler\ | ||
import FailedJobHandler | ||
from delfin.task_manager.tasks import telemetry | ||
|
||
LOG = log.getLogger(__name__) | ||
|
||
|
||
class MetricsTaskManager(manager.Manager): | ||
"""manage periodical tasks""" | ||
|
||
RPC_API_VERSION = '1.0' | ||
|
||
def __init__(self, service_name=None, *args, **kwargs): | ||
self.telemetry_task = telemetry.TelemetryTask() | ||
super(MetricsTaskManager, self).__init__(*args, **kwargs) | ||
scheduler = schedule_manager.SchedulerManager() | ||
scheduler.start() | ||
JobHandler.schedule_boot_jobs() | ||
|
||
def assign_job(self, context, task_id): | ||
instance = JobHandler.get_instance(context, task_id) | ||
instance.schedule_job(task_id) | ||
|
||
def remove_job(self, context, task_id): | ||
instance = JobHandler.get_instance(context, task_id) | ||
instance.remove_job(task_id) | ||
|
||
def assign_failed_job(self, context, failed_task_id): | ||
instance = FailedJobHandler.get_instance(context, failed_task_id) | ||
instance.schedule_failed_job(failed_task_id) | ||
|
||
def remove_failed_job(self, context, failed_task_id): | ||
instance = FailedJobHandler.get_instance(context, failed_task_id) | ||
instance.remove_failed_job(failed_task_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Copyright 2021 The SODA Authors. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
""" | ||
Client side of the metrics task manager RPC API. | ||
""" | ||
|
||
import oslo_messaging as messaging | ||
from oslo_config import cfg | ||
|
||
from delfin import rpc | ||
|
||
CONF = cfg.CONF | ||
|
||
|
||
class TaskAPI(object): | ||
"""Client side of the metrics task rpc API. | ||
API version history: | ||
1.0 - Initial version. | ||
""" | ||
|
||
RPC_API_VERSION = '1.0' | ||
|
||
def __init__(self): | ||
super(TaskAPI, self).__init__() | ||
self.target = messaging.Target(topic=CONF.host, | ||
version=self.RPC_API_VERSION) | ||
self.client = rpc.get_client(self.target, | ||
version_cap=self.RPC_API_VERSION) | ||
|
||
def get_client(self, topic): | ||
target = messaging.Target(topic=topic, | ||
version=self.RPC_API_VERSION) | ||
return rpc.get_client(target, version_cap=self.RPC_API_VERSION) | ||
|
||
def assign_job(self, context, task_id, executor): | ||
rpc_client = self.get_client(str(executor)) | ||
call_context = rpc_client.prepare(topic=str(executor), version='1.0', | ||
fanout=True) | ||
return call_context.cast(context, 'assign_job', | ||
task_id=task_id) | ||
|
||
def remove_job(self, context, task_id, executor): | ||
rpc_client = self.get_client(str(executor)) | ||
call_context = rpc_client.prepare(topic=str(executor), version='1.0', | ||
fanout=True) | ||
return call_context.cast(context, 'remove_job', | ||
task_id=task_id) | ||
|
||
def assign_failed_job(self, context, failed_task_id, executor): | ||
rpc_client = self.get_client(str(executor)) | ||
call_context = rpc_client.prepare(topic=str(executor), version='1.0', | ||
fanout=True) | ||
return call_context.cast(context, 'assign_failed_job', | ||
failed_task_id=failed_task_id) | ||
|
||
def remove_failed_job(self, context, failed_task_id, executor): | ||
rpc_client = self.get_client(str(executor)) | ||
call_context = rpc_client.prepare(topic=str(executor), version='1.0', | ||
fanout=True) | ||
return call_context.cast(context, 'remove_failed_job', | ||
failed_task_id=failed_task_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.