Skip to content

Commit

Permalink
Merge pull request #261 from arXiv/feature/#257-classic-api-error-han…
Browse files Browse the repository at this point in the history
…dling

Feature/#257 classic api error handling
  • Loading branch information
mhl10 authored Jan 23, 2020
2 parents c731cf5 + b03fa9c commit 8c4edf9
Show file tree
Hide file tree
Showing 40 changed files with 1,103 additions and 712 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ ENV/
.vscode
settings.json

# PyCharm
.idea

# mypy
.mypy_cache/

Expand Down
12 changes: 6 additions & 6 deletions bulk_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
if load_cache:
try:
this_meta = from_cache(cache_dir, paper_id)
except RuntimeError as e: # No document.
except RuntimeError: # No document.
pass

if this_meta:
Expand All @@ -67,7 +67,7 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
if len(chunk) == retrieve_chunk_size or i == last:
try:
new_meta = metadata.bulk_retrieve(chunk)
except metadata.ConnectionFailed as e: # Try again.
except metadata.ConnectionFailed: # Try again.
new_meta = metadata.bulk_retrieve(chunk)
# Add metadata to the cache.
key = lambda dm: dm.paper_id
Expand All @@ -93,8 +93,8 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
meta = []
index_bar.update(i)

except Exception as e:
raise RuntimeError('Populate failed: %s' % str(e)) from e
except Exception as ex:
raise RuntimeError('Populate failed: %s' % str(ex)) from ex

finally:
click.echo(f"Indexed {index_count} documents in total")
Expand Down Expand Up @@ -162,8 +162,8 @@ def to_cache(cache_dir: str, arxiv_id: str, docmeta: List[DocMeta]) -> None:
try:
with open(cache_path, 'w') as f:
json.dump([asdict(dm) for dm in docmeta], f)
except Exception as e:
raise RuntimeError(str(e)) from e
except Exception as ex:
raise RuntimeError(str(ex)) from ex


def load_id_list(path: str) -> List[str]:
Expand Down
80 changes: 40 additions & 40 deletions search/agent/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,22 @@ def _get_metadata(self, arxiv_id: str) -> DocMeta:
docmeta: DocMeta = retry_call(metadata.retrieve, (arxiv_id,),
exceptions=metadata.ConnectionFailed,
tries=2)
except metadata.ConnectionFailed as e:
except metadata.ConnectionFailed as ex:
# Things really are looking bad. There is no need to keep
# trying with subsequent records, so let's abort entirely.
logger.error('%s: second attempt failed, giving up', arxiv_id)
raise IndexingFailed(
'Indexing failed; metadata endpoint could not be reached.'
) from e
except metadata.RequestFailed as e:
) from ex
except metadata.RequestFailed as ex:
logger.error(f'{arxiv_id}: request failed')
raise DocumentFailed('Request to metadata service failed') from e
except metadata.BadResponse as e:
raise DocumentFailed('Request to metadata service failed') from ex
except metadata.BadResponse as ex:
logger.error(f'{arxiv_id}: bad response from metadata service')
raise DocumentFailed('Bad response from metadata service') from e
except Exception as e:
logger.error(f'{arxiv_id}: unhandled error, metadata service: {e}')
raise IndexingFailed('Unhandled exception') from e
raise DocumentFailed('Bad response from metadata service') from ex
except Exception as ex:
logger.error(f'{arxiv_id}: unhandled error, metadata service: {ex}')
raise IndexingFailed('Unhandled exception') from ex
return docmeta

def _get_bulk_metadata(self, arxiv_ids: List[str]) -> List[DocMeta]:
Expand Down Expand Up @@ -116,20 +116,20 @@ def _get_bulk_metadata(self, arxiv_ids: List[str]) -> List[DocMeta]:
meta = retry_call(metadata.bulk_retrieve, (arxiv_ids,),
exceptions=metadata.ConnectionFailed,
tries=2)
except metadata.ConnectionFailed as e:
except metadata.ConnectionFailed as ex:
# Things really are looking bad. There is no need to keep
# trying with subsequent records, so let's abort entirely.
logger.error('%s: second attempt failed, giving up', arxiv_ids)
raise IndexingFailed('Metadata endpoint not available') from e
except metadata.RequestFailed as e:
raise IndexingFailed('Metadata endpoint not available') from ex
except metadata.RequestFailed as ex:
logger.error('%s: request failed', arxiv_ids)
raise DocumentFailed('Request to metadata service failed') from e
except metadata.BadResponse as e:
raise DocumentFailed('Request to metadata service failed') from ex
except metadata.BadResponse as ex:
logger.error('%s: bad response from metadata service', arxiv_ids)
raise DocumentFailed('Bad response from metadata service') from e
except Exception as e:
logger.error('%s: unhandled error, metadata svc: %s', arxiv_ids, e)
raise IndexingFailed('Unhandled exception') from e
raise DocumentFailed('Bad response from metadata service') from ex
except Exception as ex:
logger.error('%s: unhandled error, metadata svc: %s', arxiv_ids, ex)
raise IndexingFailed('Unhandled exception') from ex
return meta

@staticmethod
Expand All @@ -156,10 +156,10 @@ def _transform_to_document(docmeta: DocMeta) -> Document:
"""
try:
document = transform.to_search_document(docmeta)
except Exception as e:
except Exception as ex:
# At the moment we don't have any special exceptions.
logger.error('unhandled exception during transform: %s', e)
raise DocumentFailed('Could not transform document') from e
logger.error('unhandled exception during transform: %s', ex)
raise DocumentFailed('Could not transform document') from ex

return document

Expand All @@ -182,11 +182,11 @@ def _add_to_index(document: Document) -> None:
try:
retry_call(index.SearchSession.add_document, (document,),
exceptions=index.IndexConnectionError, tries=2)
except index.IndexConnectionError as e:
raise IndexingFailed('Could not index document') from e
except Exception as e:
logger.error(f'Unhandled exception from index service: {e}')
raise IndexingFailed('Unhandled exception') from e
except index.IndexConnectionError as ex:
raise IndexingFailed('Could not index document') from ex
except Exception as ex:
logger.error(f'Unhandled exception from index service: {ex}')
raise IndexingFailed('Unhandled exception') from ex

@staticmethod
def _bulk_add_to_index(documents: List[Document]) -> None:
Expand All @@ -207,11 +207,11 @@ def _bulk_add_to_index(documents: List[Document]) -> None:
try:
retry_call(index.SearchSession.bulk_add_documents, (documents,),
exceptions=index.IndexConnectionError, tries=2)
except index.IndexConnectionError as e:
raise IndexingFailed('Could not bulk index documents') from e
except Exception as e:
logger.error(f'Unhandled exception from index service: {e}')
raise IndexingFailed('Unhandled exception') from e
except index.IndexConnectionError as ex:
raise IndexingFailed('Could not bulk index documents') from ex
except Exception as ex:
logger.error(f'Unhandled exception from index service: {ex}')
raise IndexingFailed('Unhandled exception') from ex

def index_paper(self, arxiv_id: str) -> None:
"""
Expand Down Expand Up @@ -254,10 +254,10 @@ def index_papers(self, arxiv_ids: List[str]) -> None:
documents.append(document)
logger.debug('add to index in bulk')
MetadataRecordProcessor._bulk_add_to_index(documents)
except (DocumentFailed, IndexingFailed) as e:
except (DocumentFailed, IndexingFailed) as ex:
# We just pass these along so that process_record() can keep track.
logger.debug(f'{arxiv_ids}: Document failed: {e}')
raise e
logger.debug(f'{arxiv_ids}: Document failed: {ex}')
raise ex

def process_record(self, record: dict) -> None:
"""
Expand Down Expand Up @@ -285,18 +285,18 @@ def process_record(self, record: dict) -> None:

try:
deserialized = json.loads(record['Data'].decode('utf-8'))
except json.decoder.JSONDecodeError as e:
logger.error("Error while deserializing data %s", e)
except json.decoder.JSONDecodeError as ex:
logger.error("Error while deserializing data %s", ex)
logger.error("Data payload: %s", record['Data'])
raise DocumentFailed('Could not deserialize record data')
# return # Don't bring down the whole batch.

try:
arxiv_id: str = deserialized.get('document_id')
self.index_paper(arxiv_id)
except DocumentFailed as e:
logger.debug('%s: failed to index document: %s', arxiv_id, e)
except DocumentFailed as ex:
logger.debug('%s: failed to index document: %s', arxiv_id, ex)
self._error_count += 1
except IndexingFailed as e:
logger.error('Indexing failed: %s', e)
except IndexingFailed as ex:
logger.error('Indexing failed: %s', ex)
raise
8 changes: 4 additions & 4 deletions search/agent/tests/test_record_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def test_add_document_succeeds(self, mock_index, mock_client_factory):
processor = consumer.MetadataRecordProcessor(*self.args)
try:
processor._add_to_index(Document())
except Exception as e:
self.fail(e)
except Exception as ex:
self.fail(ex)
mock_index.add_document.assert_called_once()

@mock.patch('boto3.client')
Expand Down Expand Up @@ -178,8 +178,8 @@ def test_bulk_add_documents_succeeds(self, mock_index,
processor = consumer.MetadataRecordProcessor(*self.args)
try:
processor._bulk_add_to_index([Document()])
except Exception as e:
self.fail(e)
except Exception as ex:
self.fail(ex)
mock_index.bulk_add_documents.assert_called_once()

@mock.patch('boto3.client')
Expand Down
16 changes: 8 additions & 8 deletions search/controllers/advanced/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,30 +116,30 @@ def search(request_params: MultiDict) -> Response:
# template rendering, so they get added directly to the
# response content. asdict(
response_data.update(SearchSession.search(q)) # type: ignore
except index.IndexConnectionError as e:
except index.IndexConnectionError as ex:
# There was a (hopefully transient) connection problem. Either
# this will clear up relatively quickly (next request), or
# there is a more serious outage.
logger.error('IndexConnectionError: %s', e)
logger.error('IndexConnectionError: %s', ex)
raise InternalServerError(
"There was a problem connecting to the search index. This "
"is quite likely a transient issue, so please try your "
"search again. If this problem persists, please report it "
"to [email protected]."
) from e
except index.QueryError as e:
) from ex
except index.QueryError as ex:
# Base exception routers should pick this up and show bug page.
logger.error('QueryError: %s', e)
logger.error('QueryError: %s', ex)
raise InternalServerError(
"There was a problem executing your query. Please try "
"your search again. If this problem persists, please "
"report it to [email protected]."
) from e
except index.OutsideAllowedRange as e:
) from ex
except index.OutsideAllowedRange as ex:
raise BadRequest(
"Hello clever friend. You can't get results in that range"
" right now."
) from e
) from ex
response_data['query'] = q
else:
logger.debug('form is invalid: %s', str(form.errors))
Expand Down
4 changes: 2 additions & 2 deletions search/controllers/advanced/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def _raiseQueryError(*args, **kwargs):
with self.assertRaises(InternalServerError):
try:
response_data, code, headers = advanced.search(request_data)
except QueryError as e:
self.fail("QueryError should be handled (caught %s)" % e)
except QueryError as ex:
self.fail("QueryError should be handled (caught %s)" % ex)

self.assertEqual(mock_index.search.call_count, 1,
"A search should be attempted")
Expand Down
Loading

0 comments on commit 8c4edf9

Please sign in to comment.