-
Notifications
You must be signed in to change notification settings - Fork 40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TARDIS-3783-Analyze and Improve Short Polling Implementation #16
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,9 +17,7 @@ | |
import six | ||
|
||
import time | ||
import json | ||
import certifi | ||
import urllib3 | ||
import random | ||
|
||
import opsgenie_sdk | ||
from opsgenie_sdk.models.success_data import SuccessData # noqa: F401,E501 | ||
|
@@ -56,7 +54,7 @@ class SuccessResponse(object): | |
} | ||
attribute_map['url'] = 'url' | ||
|
||
def __init__(self, request_id=None, took=0.0, result=None, data=None, url=None): # noqa: E501 | ||
def __init__(self, request_id=None, took=0.0, result=None, data=None, url=None, api_client=None): # noqa: E501 | ||
"""SuccessResponse - a model defined in OpenAPI""" # noqa: E501 | ||
|
||
self._request_id = None | ||
|
@@ -72,13 +70,11 @@ def __init__(self, request_id=None, took=0.0, result=None, data=None, url=None): | |
if data is not None: | ||
self.data = data | ||
|
||
if url is not None: | ||
self.url = url | ||
else: | ||
self.url = None | ||
self.url = url | ||
self.api_client = api_client | ||
|
||
self._id = None | ||
self.conf = opsgenie_sdk.configuration.Configuration() | ||
self.conf = api_client.configuration | ||
self.logger = self.conf.logger["package_logger"] | ||
@property | ||
def request_id(self): | ||
|
@@ -190,96 +186,67 @@ def url(self, url): | |
self._url = url | ||
|
||
def retrieve_result(self): | ||
conf = self.conf | ||
api_key = conf.api_key_prefix.get('Authorization') + ' ' + conf.api_key.get('Authorization') | ||
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) | ||
headers = { | ||
'Authorization': api_key | ||
} | ||
|
||
request_url = self._build_request_url_(conf.host) | ||
|
||
attempt_count = 0 | ||
MAX_NUMBER_OF_RETRIES = conf.short_polling_max_retries | ||
domain_api_client = self._get_domain_api_client_() | ||
MAX_NUMBER_OF_RETRIES = self.conf.short_polling_max_retries | ||
condition = True | ||
response = None | ||
attempt_count = 0 | ||
|
||
while condition: | ||
if attempt_count > 0: | ||
time.sleep(2 * attempt_count * 0.2) | ||
sleep_time = random.uniform(0, (0.2 * 2 ** attempt_count)) | ||
time.sleep(sleep_time) | ||
|
||
try: | ||
self.logger.debug( | ||
str(attempt_count + 1) + ' attempt to retrieve result with request_id: ' + str(self.request_id)) | ||
response = http.request(method='GET', url=request_url, headers=headers) | ||
except ApiException as err: | ||
print("Exception when calling success_response->retrieve_result: %s\n" % err) | ||
|
||
response_headers = response.headers | ||
if response_headers is not None: | ||
rate_limit_state = response_headers.get('X-RateLimit-State') | ||
status_code = response.status | ||
|
||
if rate_limit_state == 'THROTTLED': | ||
should_retry = True | ||
self.logger.debug('Should retry because X-RateLimit-State is THROTTLED') | ||
elif status_code == 429: | ||
should_retry = True | ||
self.logger.debug('Should retry because Status is 429') | ||
elif 500 <= status_code <= 599: | ||
should_retry = True | ||
self.logger.debug('Should retry because Status is ' + status_code) | ||
else: | ||
should_retry = False | ||
response_body = response.data | ||
response_body_decoded = json.loads(response_body) | ||
response_body_decoded_data = response_body_decoded.get('data') | ||
if response_body_decoded_data is not None: | ||
self._id = response_body_decoded_data.get('alertId') | ||
if self._id is None: | ||
self._id = response_body_decoded_data.get('incidentId') | ||
|
||
attempt_count += 1 | ||
|
||
if should_retry and attempt_count < MAX_NUMBER_OF_RETRIES: | ||
condition = True | ||
else: | ||
condition = False | ||
response = domain_api_client.get_incident_request_status(request_id=self.request_id) | ||
except AttributeError: | ||
response = domain_api_client.get_request_status(request_id=self.request_id) | ||
|
||
if response.data.is_success is True: | ||
should_retry = False | ||
|
||
if hasattr(response.data, "alert_id"): | ||
self._id = response.data.alert_id | ||
elif hasattr(response.data, "incident_id"): | ||
self._id = response.data.incident_id | ||
else: | ||
should_retry = True | ||
|
||
if should_retry and attempt_count < MAX_NUMBER_OF_RETRIES: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Off-by-one error: If incrementing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. I will fix it right away. |
||
condition = True | ||
else: | ||
condition = False | ||
|
||
attempt_count += 1 | ||
|
||
return response | ||
|
||
def retrieve_resulting_action(self): | ||
conf = self.conf | ||
api_key = conf.api_key_prefix.get('Authorization') + ' ' + conf.api_key.get('Authorization') | ||
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) | ||
headers = { | ||
'Authorization': api_key | ||
} | ||
|
||
if self._id is None: | ||
self.retrieve_result() | ||
|
||
request_url = self._build_request_url_(conf.host) | ||
domain_api_client = self._get_domain_api_client_() | ||
|
||
try: | ||
response = http.request(method='GET', url=request_url, headers=headers) | ||
action = domain_api_client.get_alert(identifier=self._id) | ||
except AttributeError: | ||
action = domain_api_client.get_incident(identifier=self._id) | ||
|
||
return response | ||
except ApiException as err: | ||
print("Exception when calling success_response->retrieve_result: %s\n" % err) | ||
return action | ||
|
||
def _build_request_url_(self, host): | ||
def _get_domain_api_client_(self): | ||
if 'alerts' in self.url: | ||
return host + '/v2/alerts/requests/' + self.request_id | ||
return opsgenie_sdk.AlertApi(api_client=self.api_client) | ||
elif 'incident' in self.url: | ||
return host + '/v1/incidents/requests/' + self.request_id | ||
return opsgenie_sdk.IncidentApi(api_client=self.api_client) | ||
else: | ||
raise ApiException(reason='Short polling is not currently supported for this domain') | ||
|
||
@property | ||
def id(self): | ||
if self._id is None: | ||
self.retrieve_result() | ||
|
||
return self._id | ||
|
||
def to_dict(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -263,6 +263,7 @@ def request(self, method, url, query_params=None, headers=None, | |
exception = exceptions.build_exception(response=r) | ||
raise exception | ||
|
||
self.retries = -1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tracking retry count here seems a bit hacky and error-prone. You've fixed one problem here but I think there are more lurking. For example, what happens when all retries are exhausted? This method raises and doesn't reset retries to -1 (and it has no way to know that retries have been exhausted in order to do the reset). So I think Seem like retry counts should be maintained at the level where the retrying is being done, and passed in somehow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, I agree. In this particular pull request, I was more focused towards improving the short polling implementation but then I realized that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Upon further investigation, I realized that this particular There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's fixed in the following pull request: #17 |
||
return r | ||
|
||
def GET(self, url, headers=None, query_params=None, _preload_content=True, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So polling delay is now starting at 0.1 and doubling on every attempt, with a uniformly distributed jitter of +/-100%
Combined with the short_polling_max_retries default value of 10 (and assuming the off-by-one bug I noted gets fixed) that gives an expected maximum aggregate sleep time of 51 seconds, and a worst-case of 102 seconds.
This plus any delays in calling get_[incident_]request_status would be the maximum blocking time for any client code calling
retrieve_result()
, directly or indirectly.Seems reasonable...