Merge pull request #261 from arXiv/feature/#257-classic-api-error-han…

…dling Feature/#257 classic api error handling
arXiv · Jan 23, 2020 · 8c4edf9 · 8c4edf9
2 parents c731cf5 + b03fa9c
commit 8c4edf9
Show file tree

Hide file tree

Showing 40 changed files with 1,103 additions and 712 deletions.
diff --git a/.gitignore b/.gitignore
@@ -101,6 +101,9 @@ ENV/
 .vscode
 settings.json
 
+# PyCharm
+.idea
+
 # mypy
 .mypy_cache/
 

diff --git a/bulk_index.py b/bulk_index.py
@@ -56,7 +56,7 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
                 if load_cache:
                     try:
                         this_meta = from_cache(cache_dir, paper_id)
-                    except RuntimeError as e:    # No document.
+                    except RuntimeError:    # No document.
                         pass
 
                 if this_meta:
@@ -67,7 +67,7 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
                 if len(chunk) == retrieve_chunk_size or i == last:
                     try:
                         new_meta = metadata.bulk_retrieve(chunk)
-                    except metadata.ConnectionFailed as e:  # Try again.
+                    except metadata.ConnectionFailed:  # Try again.
                         new_meta = metadata.bulk_retrieve(chunk)
                     # Add metadata to the cache.
                     key = lambda dm: dm.paper_id
@@ -93,8 +93,8 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
                     meta = []
                     index_bar.update(i)
 
-    except Exception as e:
-        raise RuntimeError('Populate failed: %s' % str(e)) from e
+    except Exception as ex:
+        raise RuntimeError('Populate failed: %s' % str(ex)) from ex
 
     finally:
         click.echo(f"Indexed {index_count} documents in total")
@@ -162,8 +162,8 @@ def to_cache(cache_dir: str, arxiv_id: str, docmeta: List[DocMeta]) -> None:
     try:
         with open(cache_path, 'w') as f:
             json.dump([asdict(dm) for dm in docmeta], f)
-    except Exception as e:
-        raise RuntimeError(str(e)) from e
+    except Exception as ex:
+        raise RuntimeError(str(ex)) from ex
 
 
 def load_id_list(path: str) -> List[str]:

diff --git a/search/agent/consumer.py b/search/agent/consumer.py
@@ -67,22 +67,22 @@ def _get_metadata(self, arxiv_id: str) -> DocMeta:
             docmeta: DocMeta = retry_call(metadata.retrieve, (arxiv_id,),
                                           exceptions=metadata.ConnectionFailed,
                                           tries=2)
-        except metadata.ConnectionFailed as e:
+        except metadata.ConnectionFailed as ex:
             # Things really are looking bad. There is no need to keep
             # trying with subsequent records, so let's abort entirely.
             logger.error('%s: second attempt failed, giving up', arxiv_id)
             raise IndexingFailed(
                 'Indexing failed; metadata endpoint could not be reached.'
-            ) from e
-        except metadata.RequestFailed as e:
+            ) from ex
+        except metadata.RequestFailed as ex:
             logger.error(f'{arxiv_id}: request failed')
-            raise DocumentFailed('Request to metadata service failed') from e
-        except metadata.BadResponse as e:
+            raise DocumentFailed('Request to metadata service failed') from ex
+        except metadata.BadResponse as ex:
             logger.error(f'{arxiv_id}: bad response from metadata service')
-            raise DocumentFailed('Bad response from metadata service') from e
-        except Exception as e:
-            logger.error(f'{arxiv_id}: unhandled error, metadata service: {e}')
-            raise IndexingFailed('Unhandled exception') from e
+            raise DocumentFailed('Bad response from metadata service') from ex
+        except Exception as ex:
+            logger.error(f'{arxiv_id}: unhandled error, metadata service: {ex}')
+            raise IndexingFailed('Unhandled exception') from ex
         return docmeta
 
     def _get_bulk_metadata(self, arxiv_ids: List[str]) -> List[DocMeta]:
@@ -116,20 +116,20 @@ def _get_bulk_metadata(self, arxiv_ids: List[str]) -> List[DocMeta]:
             meta = retry_call(metadata.bulk_retrieve, (arxiv_ids,),
                               exceptions=metadata.ConnectionFailed,
                               tries=2)
-        except metadata.ConnectionFailed as e:
+        except metadata.ConnectionFailed as ex:
             # Things really are looking bad. There is no need to keep
             # trying with subsequent records, so let's abort entirely.
             logger.error('%s: second attempt failed, giving up', arxiv_ids)
-            raise IndexingFailed('Metadata endpoint not available') from e
-        except metadata.RequestFailed as e:
+            raise IndexingFailed('Metadata endpoint not available') from ex
+        except metadata.RequestFailed as ex:
             logger.error('%s: request failed', arxiv_ids)
-            raise DocumentFailed('Request to metadata service failed') from e
-        except metadata.BadResponse as e:
+            raise DocumentFailed('Request to metadata service failed') from ex
+        except metadata.BadResponse as ex:
             logger.error('%s: bad response from metadata service', arxiv_ids)
-            raise DocumentFailed('Bad response from metadata service') from e
-        except Exception as e:
-            logger.error('%s: unhandled error, metadata svc: %s', arxiv_ids, e)
-            raise IndexingFailed('Unhandled exception') from e
+            raise DocumentFailed('Bad response from metadata service') from ex
+        except Exception as ex:
+            logger.error('%s: unhandled error, metadata svc: %s', arxiv_ids, ex)
+            raise IndexingFailed('Unhandled exception') from ex
         return meta
 
     @staticmethod
@@ -156,10 +156,10 @@ def _transform_to_document(docmeta: DocMeta) -> Document:
         """
         try:
             document = transform.to_search_document(docmeta)
-        except Exception as e:
+        except Exception as ex:
             # At the moment we don't have any special exceptions.
-            logger.error('unhandled exception during transform: %s', e)
-            raise DocumentFailed('Could not transform document') from e
+            logger.error('unhandled exception during transform: %s', ex)
+            raise DocumentFailed('Could not transform document') from ex
 
         return document
 
@@ -182,11 +182,11 @@ def _add_to_index(document: Document) -> None:
         try:
             retry_call(index.SearchSession.add_document, (document,),
                        exceptions=index.IndexConnectionError, tries=2)
-        except index.IndexConnectionError as e:
-            raise IndexingFailed('Could not index document') from e
-        except Exception as e:
-            logger.error(f'Unhandled exception from index service: {e}')
-            raise IndexingFailed('Unhandled exception') from e
+        except index.IndexConnectionError as ex:
+            raise IndexingFailed('Could not index document') from ex
+        except Exception as ex:
+            logger.error(f'Unhandled exception from index service: {ex}')
+            raise IndexingFailed('Unhandled exception') from ex
 
     @staticmethod
     def _bulk_add_to_index(documents: List[Document]) -> None:
@@ -207,11 +207,11 @@ def _bulk_add_to_index(documents: List[Document]) -> None:
         try:
             retry_call(index.SearchSession.bulk_add_documents, (documents,),
                        exceptions=index.IndexConnectionError, tries=2)
-        except index.IndexConnectionError as e:
-            raise IndexingFailed('Could not bulk index documents') from e
-        except Exception as e:
-            logger.error(f'Unhandled exception from index service: {e}')
-            raise IndexingFailed('Unhandled exception') from e
+        except index.IndexConnectionError as ex:
+            raise IndexingFailed('Could not bulk index documents') from ex
+        except Exception as ex:
+            logger.error(f'Unhandled exception from index service: {ex}')
+            raise IndexingFailed('Unhandled exception') from ex
 
     def index_paper(self, arxiv_id: str) -> None:
         """
@@ -254,10 +254,10 @@ def index_papers(self, arxiv_ids: List[str]) -> None:
                 documents.append(document)
             logger.debug('add to index in bulk')
             MetadataRecordProcessor._bulk_add_to_index(documents)
-        except (DocumentFailed, IndexingFailed) as e:
+        except (DocumentFailed, IndexingFailed) as ex:
             # We just pass these along so that process_record() can keep track.
-            logger.debug(f'{arxiv_ids}: Document failed: {e}')
-            raise e
+            logger.debug(f'{arxiv_ids}: Document failed: {ex}')
+            raise ex
 
     def process_record(self, record: dict) -> None:
         """
@@ -285,18 +285,18 @@ def process_record(self, record: dict) -> None:
 
         try:
             deserialized = json.loads(record['Data'].decode('utf-8'))
-        except json.decoder.JSONDecodeError as e:
-            logger.error("Error while deserializing data %s", e)
+        except json.decoder.JSONDecodeError as ex:
+            logger.error("Error while deserializing data %s", ex)
             logger.error("Data payload: %s", record['Data'])
             raise DocumentFailed('Could not deserialize record data')
             # return   # Don't bring down the whole batch.
 
         try:
             arxiv_id: str = deserialized.get('document_id')
             self.index_paper(arxiv_id)
-        except DocumentFailed as e:
-            logger.debug('%s: failed to index document: %s', arxiv_id, e)
+        except DocumentFailed as ex:
+            logger.debug('%s: failed to index document: %s', arxiv_id, ex)
             self._error_count += 1
-        except IndexingFailed as e:
-            logger.error('Indexing failed: %s', e)
+        except IndexingFailed as ex:
+            logger.error('Indexing failed: %s', ex)
             raise
diff --git a/search/agent/tests/test_record_processor.py b/search/agent/tests/test_record_processor.py
@@ -122,8 +122,8 @@ def test_add_document_succeeds(self, mock_index, mock_client_factory):
         processor = consumer.MetadataRecordProcessor(*self.args)
         try:
             processor._add_to_index(Document())
-        except Exception as e:
-            self.fail(e)
+        except Exception as ex:
+            self.fail(ex)
         mock_index.add_document.assert_called_once()
 
     @mock.patch('boto3.client')
@@ -178,8 +178,8 @@ def test_bulk_add_documents_succeeds(self, mock_index,
         processor = consumer.MetadataRecordProcessor(*self.args)
         try:
             processor._bulk_add_to_index([Document()])
-        except Exception as e:
-            self.fail(e)
+        except Exception as ex:
+            self.fail(ex)
         mock_index.bulk_add_documents.assert_called_once()
 
     @mock.patch('boto3.client')

diff --git a/search/controllers/advanced/__init__.py b/search/controllers/advanced/__init__.py
@@ -116,30 +116,30 @@ def search(request_params: MultiDict) -> Response:
                 #  template rendering, so they get added directly to the
                 #  response content. asdict(
                 response_data.update(SearchSession.search(q))  # type: ignore
-            except index.IndexConnectionError as e:
+            except index.IndexConnectionError as ex:
                 # There was a (hopefully transient) connection problem. Either
                 #  this will clear up relatively quickly (next request), or
                 #  there is a more serious outage.
-                logger.error('IndexConnectionError: %s', e)
+                logger.error('IndexConnectionError: %s', ex)
                 raise InternalServerError(
                     "There was a problem connecting to the search index. This "
                     "is quite likely a transient issue, so please try your "
                     "search again. If this problem persists, please report it "
                     "to [email protected]."
-                ) from e
-            except index.QueryError as e:
+                ) from ex
+            except index.QueryError as ex:
                 # Base exception routers should pick this up and show bug page.
-                logger.error('QueryError: %s', e)
+                logger.error('QueryError: %s', ex)
                 raise InternalServerError(
                     "There was a problem executing your query. Please try "
                     "your search again.  If this problem persists, please "
                     "report it to [email protected]."
-                ) from e
-            except index.OutsideAllowedRange as e:
+                ) from ex
+            except index.OutsideAllowedRange as ex:
                 raise BadRequest(
                     "Hello clever friend. You can't get results in that range"
                     " right now."
-                ) from e
+                ) from ex
             response_data['query'] = q
         else:
             logger.debug('form is invalid: %s', str(form.errors))

diff --git a/search/controllers/advanced/tests.py b/search/controllers/advanced/tests.py
@@ -161,8 +161,8 @@ def _raiseQueryError(*args, **kwargs):
         with self.assertRaises(InternalServerError):
             try:
                 response_data, code, headers = advanced.search(request_data)
-            except QueryError as e:
-                self.fail("QueryError should be handled (caught %s)" % e)
+            except QueryError as ex:
+                self.fail("QueryError should be handled (caught %s)" % ex)
 
         self.assertEqual(mock_index.search.call_count, 1,
                          "A search should be attempted")
-Original file line number
+Diff line change
@@ Expand Up / @@ -101,6 +101,9 @@ ENV/ @@
     .vscode
     settings.json
+    # PyCharm
+    .idea
     # mypy
     .mypy_cache/
@@ Expand Down @@