Merge pull request #239 from arXiv/task/ARXIVNG-1516

Atom/XML Serializer
arXiv · Jun 11, 2019 · b1848d4 · b1848d4
2 parents d50adc1 + 24ab5f9
commit b1848d4
Show file tree

Hide file tree

Showing 7 changed files with 387 additions and 32 deletions.
diff --git a/Pipfile b/Pipfile
@@ -17,6 +17,7 @@ dataclasses = "==0.4"
 docutils = "==0.14"
 elasticsearch = "==6.2.0"
 elasticsearch-dsl = "==6.3.1"
+feedgen = "==0.7.0"
 flask = "==1.0.2"
 "flask-s3" = "==0.3.3"
 idna = "==2.6"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
@@ -145,6 +145,15 @@ def classic_query(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any
         params['query'] = raw_query
         del params['search_query']
 
+        params.add('include', 'abstract')
+        params.add('include', 'submitted_date')
+        params.add('include', 'updated_date')
+        params.add('include', 'comments')
+        params.add('include', 'journal_ref')
+        params.add('include', 'doi')
+        params.add('include', 'primary_classification')
+        params.add('include', 'secondary_classification')
+        params.add('include', 'authors')
         # pass to normal search, which will handle parsing
         data, _, _ = search(params) # type: ignore
 

diff --git a/search/process/transform.py b/search/process/transform.py
@@ -49,6 +49,8 @@ def _transformAuthor(author: dict) -> Optional[Dict]:
     author['initials'] = " ".join([pt[0] for pt in author['first_name'].split() if pt])
     name_parts = author['first_name'].split() + author['last_name'].split()
     author['full_name_initialized'] = ' '.join([part[0] for part in name_parts[:-1]] + [name_parts[-1]])
+    # TODO: add handling for arxiv:affiliation
+
     return author
 
 

diff --git a/search/routes/api/atom_extensions.py b/search/routes/api/atom_extensions.py
@@ -0,0 +1,280 @@
+"""Classes derived from the Feedgen extension classes."""
+
+from typing import Any, Dict
+from feedgen.ext.base import BaseEntryExtension, BaseExtension
+from feedgen.entry import FeedEntry
+from feedgen.feed import FeedGenerator
+from lxml import etree
+
+
+class OpenSearchExtension(BaseExtension):
+    """Extension of the Feedgen base class to put OpenSearch metadata."""
+
+    def __init__(self: BaseExtension) -> None:
+        """Initialize extension parameters."""
+
+        self.__opensearch_totalResults = None
+        self.__opensearch_startIndex = None
+        self.__opensearch_itemsPerPage = None
+
+    def extend_atom(self: BaseExtension, atom_feed: FeedGenerator) -> FeedGenerator:
+        """
+        Assign the Atom feed generator to the extension.
+
+        Parameters
+        ----------
+        atom_feed : :class:`.FeedGenerator`
+            The FeedGenerator to use for Atom results.
+
+        Returns
+        -------
+        FeedGenerator
+            The provided feed generator.
+
+        """
+        if self.__opensearch_itemsPerPage is not None:
+            elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
+            elt.text= self.__opensearch_itemsPerPage
+
+        if self.__opensearch_totalResults is not None:
+            elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}totalResults')
+            elt.text= self.__opensearch_totalResults
+
+        if self.__opensearch_startIndex is not None:
+            elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}startIndex')
+            elt.text = self.__opensearch_startIndex
+
+        return atom_feed
+
+    @staticmethod
+    def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
+        """
+        Assign the RSS feed generator to the extension.
+
+        Parameters
+        ----------
+        rss_feed
+            The FeedGenerator to use for RSS results.
+
+        Returns
+        -------
+        FeedGenerator
+            The provided feed generator.
+
+        """
+        return rss_feed
+
+    @staticmethod
+    def extend_ns() -> Dict[str, str]:
+        """
+        Assign the feed's namespace string.
+
+        Returns
+        -------
+        str
+            The definition string for the "arxiv" namespace.
+
+        """
+        return {'opensearch': 'http://a9.com/-/spec/opensearch/1.1/'}
+
+    def totalResults(self: BaseExtension, text: str):
+        """ Set the totalResults parameter. """
+        self.__opensearch_totalResults = str(text)
+
+    def startIndex(self: BaseExtension, text: str):
+        """ Set the startIndex parameter. """
+        self.__opensearch_startIndex = str(text)
+
+    def itemsPerPage(self: BaseExtension, text: str):
+        """ Set the itemsPerPage parameter. """
+        self.__opensearch_itemsPerPage = str(text)
+
+
+class ArxivExtension(BaseExtension):
+    """Extension of the Feedgen base class to allow us to define namespaces."""
+
+    def __init__(self: BaseExtension) -> None:
+        """Noop initialization."""
+        pass
+
+    @staticmethod
+    def extend_atom(atom_feed: FeedGenerator) -> FeedGenerator:
+        """
+        Assign the Atom feed generator to the extension.
+
+        Parameters
+        ----------
+        atom_feed
+            The FeedGenerator to use for Atom results.
+
+        Returns
+        -------
+        FeedGenerator
+            The provided feed generator.
+
+        """
+        return atom_feed
+
+    @staticmethod
+    def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
+        """
+        Assign the RSS feed generator to the extension.
+
+        Parameters
+        ----------
+        rss_feed
+            The FeedGenerator to use for RSS results.
+
+        Returns
+        -------
+        FeedGenerator
+            The provided feed generator.
+
+        """
+        return rss_feed
+
+    @staticmethod
+    def extend_ns() -> Dict[str, str]:
+        """
+        Assign the feed's namespace string.
+
+        Returns
+        -------
+        str
+            The definition string for the "arxiv" namespace.
+
+        """
+        return {'arxiv': 'http://arxiv.org/schemas/atom'}
+
+
+class ArxivEntryExtension(BaseEntryExtension):
+    """Extension of the Feedgen base class to allow us to add elements to the Atom output."""
+
+    def __init__(self: BaseEntryExtension):
+        """Initialize the member values to all be empty."""
+        self.__arxiv_comment = None
+        self.__arxiv_primary_category = None
+        self.__arxiv_doi = None
+        self.__arxiv_journal_ref = None
+        self.__arxiv_authors = []
+
+    def extend_atom(self: BaseEntryExtension, entry: FeedEntry) -> FeedEntry:
+        """
+        Add this extension's new elements to the Atom feed entry.
+
+        Parameters
+        ----------
+        entry
+            The FeedEntry to modify.
+
+        Returns
+        -------
+        FeedEntry
+            The modified entry.
+
+        """
+        if self.__arxiv_comment:
+            comment_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}comment')
+            comment_element.text = self.__arxiv_comment
+
+        if self.__arxiv_primary_category:
+            primary_category_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}primary_category')
+            primary_category_element.attrib['term'] = self.__arxiv_primary_category
+
+        if self.__arxiv_journal_ref:
+            journal_ref_element =\
+                etree.SubElement(entry, '{http://arxiv.org/schemas/atom}journal_ref')
+            journal_ref_element.text = self.__arxiv_journal_ref
+
+        if self.__arxiv_doi:
+            for doi in self.__arxiv_doi:
+                doi_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}doi')
+                doi_element.text = doi
+
+        if self.__arxiv_authors:
+            for author in self.__arxiv_authors:
+                author_element = etree.SubElement(entry, 'author')
+                name_element = etree.SubElement(author_element, 'name')
+                name_element.text = author['name']
+                for affiliation in author['affiliation']:
+                    affiliation_element = etree.SubElement(author_element, '{http://arxiv.org/schemas/atom}affiliation')
+                    affiliation_element.text = affiliation
+
+        return entry
+
+    @staticmethod
+    def extend_rss(entry: FeedEntry) -> FeedEntry:
+        """
+        Add this extension's new elements to the RSS feed entry.
+
+        Parameters
+        ----------
+        entry
+            The FeedEntry to modify.
+
+        Returns
+        -------
+        FeedEntry
+            The modfied entry.
+
+        """
+        return entry
+
+    def comment(self: BaseEntryExtension, text: str) -> None:
+        """
+        Assign the comment value to this entry.
+
+        Parameters
+        ----------
+        text
+            The new comment text.
+
+        """
+        self.__arxiv_comment = text
+
+    def primary_category(self: BaseEntryExtension, text: str) -> None:
+        """
+        Assign the primary_category value to this entry.
+
+        Parameters
+        ----------
+        text
+            The new primary_category name.
+
+        """
+        self.__arxiv_primary_category = text
+
+    def journal_ref(self: BaseEntryExtension, text: str) -> None:
+        """
+        Assign the journal_ref value to this entry.
+
+        Parameters
+        ----------
+        text
+            The new journal_ref value.
+
+        """
+        self.__arxiv_journal_ref = text
+
+    def doi(self: BaseEntryExtension, list: Dict[str, str]) -> None:
+        """
+        Assign the doi value to this entry.
+
+        Parameters
+        ----------
+        list
+            The new list of DOI assignments.
+
+        """
+        self.__arxiv_doi = list
+
+    def author(self: BaseEntryExtension, data: Dict[str, Any]) -> None:
+        """
+        Add an author to this entry.
+
+        Parameters
+        ----------
+        data
+            A dictionary consisting of the author name and affiliation data.
+        """
+        self.__arxiv_authors.append(data)
diff --git a/search/routes/api/classic.py b/search/routes/api/classic.py
@@ -28,7 +28,7 @@ def query() -> Response:
     # requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
     # if requested == ATOM_XML:
     #     return serialize.as_atom(data), status, headers
-    response_data = serialize.as_json(data['results'], query=data['query'])
+    response_data = serialize.as_atom(data['results'], query=data['query'])
     return response_data, status_code, headers
 
 @blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])