Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Atom/XML Serializer #239

Merged
merged 5 commits into from
Jun 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dataclasses = "==0.4"
docutils = "==0.14"
elasticsearch = "==6.2.0"
elasticsearch-dsl = "==6.3.1"
feedgen = "==0.7.0"
flask = "==1.0.2"
"flask-s3" = "==0.3.3"
idna = "==2.6"
Expand Down
13 changes: 10 additions & 3 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions search/controllers/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ def classic_query(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any
params['query'] = raw_query
del params['search_query']

params.add('include', 'abstract')
params.add('include', 'submitted_date')
params.add('include', 'updated_date')
params.add('include', 'comments')
params.add('include', 'journal_ref')
params.add('include', 'doi')
params.add('include', 'primary_classification')
params.add('include', 'secondary_classification')
params.add('include', 'authors')
# pass to normal search, which will handle parsing
data, _, _ = search(params) # type: ignore

Expand Down
2 changes: 2 additions & 0 deletions search/process/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def _transformAuthor(author: dict) -> Optional[Dict]:
author['initials'] = " ".join([pt[0] for pt in author['first_name'].split() if pt])
name_parts = author['first_name'].split() + author['last_name'].split()
author['full_name_initialized'] = ' '.join([part[0] for part in name_parts[:-1]] + [name_parts[-1]])
# TODO: add handling for arxiv:affiliation

return author


Expand Down
280 changes: 280 additions & 0 deletions search/routes/api/atom_extensions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
"""Classes derived from the Feedgen extension classes."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a line or two of context about Feedgen? E.g. at a high level, how does it work?


from typing import Any, Dict
from feedgen.ext.base import BaseEntryExtension, BaseExtension
from feedgen.entry import FeedEntry
from feedgen.feed import FeedGenerator
from lxml import etree


class OpenSearchExtension(BaseExtension):
"""Extension of the Feedgen base class to put OpenSearch metadata."""

def __init__(self: BaseExtension) -> None:
JaimieMurdock marked this conversation as resolved.
Show resolved Hide resolved
"""Initialize extension parameters."""

self.__opensearch_totalResults = None
self.__opensearch_startIndex = None
self.__opensearch_itemsPerPage = None

def extend_atom(self: BaseExtension, atom_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the Atom feed generator to the extension.

Parameters
----------
atom_feed : :class:`.FeedGenerator`
The FeedGenerator to use for Atom results.

Returns
-------
FeedGenerator
The provided feed generator.

"""
if self.__opensearch_itemsPerPage is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage')
elt.text= self.__opensearch_itemsPerPage

if self.__opensearch_totalResults is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}totalResults')
elt.text= self.__opensearch_totalResults

if self.__opensearch_startIndex is not None:
elt = etree.SubElement(atom_feed, '{http://a9.com/-/spec/opensearch/1.1/}startIndex')
elt.text = self.__opensearch_startIndex

return atom_feed

@staticmethod
def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the RSS feed generator to the extension.

Parameters
----------
rss_feed
The FeedGenerator to use for RSS results.

Returns
-------
FeedGenerator
The provided feed generator.

"""
return rss_feed

@staticmethod
def extend_ns() -> Dict[str, str]:
"""
Assign the feed's namespace string.

Returns
-------
str
The definition string for the "arxiv" namespace.

"""
return {'opensearch': 'http://a9.com/-/spec/opensearch/1.1/'}

def totalResults(self: BaseExtension, text: str):
""" Set the totalResults parameter. """
self.__opensearch_totalResults = str(text)

def startIndex(self: BaseExtension, text: str):
""" Set the startIndex parameter. """
self.__opensearch_startIndex = str(text)

def itemsPerPage(self: BaseExtension, text: str):
""" Set the itemsPerPage parameter. """
self.__opensearch_itemsPerPage = str(text)


class ArxivExtension(BaseExtension):
"""Extension of the Feedgen base class to allow us to define namespaces."""

def __init__(self: BaseExtension) -> None:
"""Noop initialization."""
pass

@staticmethod
def extend_atom(atom_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the Atom feed generator to the extension.

Parameters
----------
atom_feed
The FeedGenerator to use for Atom results.

Returns
-------
FeedGenerator
The provided feed generator.

"""
return atom_feed

@staticmethod
def extend_rss(rss_feed: FeedGenerator) -> FeedGenerator:
"""
Assign the RSS feed generator to the extension.

Parameters
----------
rss_feed
The FeedGenerator to use for RSS results.

Returns
-------
FeedGenerator
The provided feed generator.

"""
return rss_feed

@staticmethod
def extend_ns() -> Dict[str, str]:
"""
Assign the feed's namespace string.

Returns
-------
str
The definition string for the "arxiv" namespace.

"""
return {'arxiv': 'http://arxiv.org/schemas/atom'}


class ArxivEntryExtension(BaseEntryExtension):
"""Extension of the Feedgen base class to allow us to add elements to the Atom output."""

def __init__(self: BaseEntryExtension):
"""Initialize the member values to all be empty."""
self.__arxiv_comment = None
self.__arxiv_primary_category = None
self.__arxiv_doi = None
self.__arxiv_journal_ref = None
self.__arxiv_authors = []

def extend_atom(self: BaseEntryExtension, entry: FeedEntry) -> FeedEntry:
"""
Add this extension's new elements to the Atom feed entry.

Parameters
----------
entry
The FeedEntry to modify.

Returns
-------
FeedEntry
The modified entry.

"""
if self.__arxiv_comment:
comment_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}comment')
comment_element.text = self.__arxiv_comment

if self.__arxiv_primary_category:
primary_category_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}primary_category')
primary_category_element.attrib['term'] = self.__arxiv_primary_category

if self.__arxiv_journal_ref:
journal_ref_element =\
etree.SubElement(entry, '{http://arxiv.org/schemas/atom}journal_ref')
journal_ref_element.text = self.__arxiv_journal_ref

if self.__arxiv_doi:
for doi in self.__arxiv_doi:
doi_element = etree.SubElement(entry, '{http://arxiv.org/schemas/atom}doi')
doi_element.text = doi

if self.__arxiv_authors:
for author in self.__arxiv_authors:
author_element = etree.SubElement(entry, 'author')
name_element = etree.SubElement(author_element, 'name')
name_element.text = author['name']
for affiliation in author['affiliation']:
affiliation_element = etree.SubElement(author_element, '{http://arxiv.org/schemas/atom}affiliation')
affiliation_element.text = affiliation

return entry

@staticmethod
def extend_rss(entry: FeedEntry) -> FeedEntry:
"""
Add this extension's new elements to the RSS feed entry.

Parameters
----------
entry
The FeedEntry to modify.

Returns
-------
FeedEntry
The modfied entry.

"""
return entry

def comment(self: BaseEntryExtension, text: str) -> None:
"""
Assign the comment value to this entry.

Parameters
----------
text
The new comment text.

"""
self.__arxiv_comment = text

def primary_category(self: BaseEntryExtension, text: str) -> None:
"""
Assign the primary_category value to this entry.

Parameters
----------
text
The new primary_category name.

"""
self.__arxiv_primary_category = text

def journal_ref(self: BaseEntryExtension, text: str) -> None:
"""
Assign the journal_ref value to this entry.

Parameters
----------
text
The new journal_ref value.

"""
self.__arxiv_journal_ref = text

def doi(self: BaseEntryExtension, list: Dict[str, str]) -> None:
"""
Assign the doi value to this entry.

Parameters
----------
list
The new list of DOI assignments.

"""
self.__arxiv_doi = list

def author(self: BaseEntryExtension, data: Dict[str, Any]) -> None:
"""
Add an author to this entry.

Parameters
----------
data
A dictionary consisting of the author name and affiliation data.
"""
self.__arxiv_authors.append(data)
2 changes: 1 addition & 1 deletion search/routes/api/classic.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def query() -> Response:
# requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
# if requested == ATOM_XML:
# return serialize.as_atom(data), status, headers
response_data = serialize.as_json(data['results'], query=data['query'])
response_data = serialize.as_atom(data['results'], query=data['query'])
return response_data, status_code, headers

@blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
Expand Down
Loading