Skip to content

Commit

Permalink
Merge pull request #239 from arXiv/task/ARXIVNG-1516
Browse files Browse the repository at this point in the history
Atom/XML Serializer
  • Loading branch information
JaimieMurdock authored Jun 11, 2019
2 parents d50adc1 + 24ab5f9 commit b1848d4
Show file tree
Hide file tree
Showing 7 changed files with 387 additions and 32 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dataclasses = "==0.4"
docutils = "==0.14"
elasticsearch = "==6.2.0"
elasticsearch-dsl = "==6.3.1"
feedgen = "==0.7.0"
flask = "==1.0.2"
"flask-s3" = "==0.3.3"
idna = "==2.6"
Expand Down
13 changes: 10 additions & 3 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions search/controllers/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ def classic_query(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any
params['query'] = raw_query
del params['search_query']

params.add('include', 'abstract')
params.add('include', 'submitted_date')
params.add('include', 'updated_date')
params.add('include', 'comments')
params.add('include', 'journal_ref')
params.add('include', 'doi')
params.add('include', 'primary_classification')
params.add('include', 'secondary_classification')
params.add('include', 'authors')
# pass to normal search, which will handle parsing
data, _, _ = search(params) # type: ignore

Expand Down
2 changes: 2 additions & 0 deletions search/process/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def _transformAuthor(author: dict) -> Optional[Dict]:
author['initials'] = " ".join([pt[0] for pt in author['first_name'].split() if pt])
name_parts = author['first_name'].split() + author['last_name'].split()
author['full_name_initialized'] = ' '.join([part[0] for part in name_parts[:-1]] + [name_parts[-1]])
# TODO: add handling for arxiv:affiliation

return author


Expand Down
280 changes: 280 additions & 0 deletions search/routes/api/atom_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
"""Classes derived from the Feedgen extension classes."""

from typing import Any, Dict
from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.entry import FeedEntry
from feedgen.feed import FeedGenerator
from lxml import etree


class OpenSearchExtension(BaseExtension):
"""Extension of the Feedgen base class to put OpenSearch metadata."""

def __init__(self: BaseExtension) -> None:
"""Initialize extension parameters."""

self.__opensearch_totalResults = None
self.__opensearch_startIndex = None
self.__opensearch_itemsPerPage = None

def extend_atom(self: BaseExtension, atom_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the Atom feed generator to the extension.
Parameters
----------
atom_feed : :class:`.FeedGenerator`
The FeedGenerator to use for Atom results.
Returns
-------
FeedGenerator
The provided feed generator.
"""
if self.__opensearch_itemsPerPage is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
elt.text= self.__opensearch_itemsPerPage

if self.__opensearch_totalResults is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}totalResults')
elt.text= self.__opensearch_totalResults

if self.__opensearch_startIndex is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}startIndex')
elt.text = self.__opensearch_startIndex

return atom_feed

@staticmethod
def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the RSS feed generator to the extension.
Parameters
----------
rss_feed
The FeedGenerator to use for RSS results.
Returns
-------
FeedGenerator
The provided feed generator.
"""
return rss_feed

@staticmethod
def extend_ns() -> Dict[str, str]:
"""
Assign the feed's namespace string.
Returns
-------
str
The definition string for the "arxiv" namespace.
"""
return {'opensearch': 'http://a9.com/-/spec/opensearch/1.1/'}

def totalResults(self: BaseExtension, text: str):
""" Set the totalResults parameter. """
self.__opensearch_totalResults = str(text)

def startIndex(self: BaseExtension, text: str):
""" Set the startIndex parameter. """
self.__opensearch_startIndex = str(text)

def itemsPerPage(self: BaseExtension, text: str):
""" Set the itemsPerPage parameter. """
self.__opensearch_itemsPerPage = str(text)


class ArxivExtension(BaseExtension):
"""Extension of the Feedgen base class to allow us to define namespaces."""

def __init__(self: BaseExtension) -> None:
"""Noop initialization."""
pass

@staticmethod
def extend_atom(atom_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the Atom feed generator to the extension.
Parameters
----------
atom_feed
The FeedGenerator to use for Atom results.
Returns
-------
FeedGenerator
The provided feed generator.
"""
return atom_feed

@staticmethod
def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the RSS feed generator to the extension.
Parameters
----------
rss_feed
The FeedGenerator to use for RSS results.
Returns
-------
FeedGenerator
The provided feed generator.
"""
return rss_feed

@staticmethod
def extend_ns() -> Dict[str, str]:
"""
Assign the feed's namespace string.
Returns
-------
str
The definition string for the "arxiv" namespace.
"""
return {'arxiv': 'http://arxiv.org/schemas/atom'}


class ArxivEntryExtension(BaseEntryExtension):
"""Extension of the Feedgen base class to allow us to add elements to the Atom output."""

def __init__(self: BaseEntryExtension):
"""Initialize the member values to all be empty."""
self.__arxiv_comment = None
self.__arxiv_primary_category = None
self.__arxiv_doi = None
self.__arxiv_journal_ref = None
self.__arxiv_authors = []

def extend_atom(self: BaseEntryExtension, entry: FeedEntry) -> FeedEntry:
"""
Add this extension's new elements to the Atom feed entry.
Parameters
----------
entry
The FeedEntry to modify.
Returns
-------
FeedEntry
The modified entry.
"""
if self.__arxiv_comment:
comment_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}comment')
comment_element.text = self.__arxiv_comment

if self.__arxiv_primary_category:
primary_category_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}primary_category')
primary_category_element.attrib['term'] = self.__arxiv_primary_category

if self.__arxiv_journal_ref:
journal_ref_element =\
etree.SubElement(entry, '{http://arxiv.org/schemas/atom}journal_ref')
journal_ref_element.text = self.__arxiv_journal_ref

if self.__arxiv_doi:
for doi in self.__arxiv_doi:
doi_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}doi')
doi_element.text = doi

if self.__arxiv_authors:
for author in self.__arxiv_authors:
author_element = etree.SubElement(entry, 'author')
name_element = etree.SubElement(author_element, 'name')
name_element.text = author['name']
for affiliation in author['affiliation']:
affiliation_element = etree.SubElement(author_element, '{http://arxiv.org/schemas/atom}affiliation')
affiliation_element.text = affiliation

return entry

@staticmethod
def extend_rss(entry: FeedEntry) -> FeedEntry:
"""
Add this extension's new elements to the RSS feed entry.
Parameters
----------
entry
The FeedEntry to modify.
Returns
-------
FeedEntry
The modfied entry.
"""
return entry

def comment(self: BaseEntryExtension, text: str) -> None:
"""
Assign the comment value to this entry.
Parameters
----------
text
The new comment text.
"""
self.__arxiv_comment = text

def primary_category(self: BaseEntryExtension, text: str) -> None:
"""
Assign the primary_category value to this entry.
Parameters
----------
text
The new primary_category name.
"""
self.__arxiv_primary_category = text

def journal_ref(self: BaseEntryExtension, text: str) -> None:
"""
Assign the journal_ref value to this entry.
Parameters
----------
text
The new journal_ref value.
"""
self.__arxiv_journal_ref = text

def doi(self: BaseEntryExtension, list: Dict[str, str]) -> None:
"""
Assign the doi value to this entry.
Parameters
----------
list
The new list of DOI assignments.
"""
self.__arxiv_doi = list

def author(self: BaseEntryExtension, data: Dict[str, Any]) -> None:
"""
Add an author to this entry.
Parameters
----------
data
A dictionary consisting of the author name and affiliation data.
"""
self.__arxiv_authors.append(data)
2 changes: 1 addition & 1 deletion search/routes/api/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def query() -> Response:
# requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
# if requested == ATOM_XML:
# return serialize.as_atom(data), status, headers
response_data = serialize.as_json(data['results'], query=data['query'])
response_data = serialize.as_atom(data['results'], query=data['query'])
return response_data, status_code, headers

@blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
Expand Down
Loading

0 comments on commit b1848d4

Please sign in to comment.