From 473909cf7321314dab7fb79dbc6e3a042468463f Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 19 Apr 2018 14:49:10 -0400
Subject: [PATCH 01/77] ARXIVNG-281 moved Kinesis BaseConsumer to arxiv-base

---
 Pipfile                                       |   3 +-
 Pipfile.lock                                  |   9 +-
 search/agent/__init__.py                      |  37 +-
 search/agent/base.py                          | 352 ------------------
 search/agent/consumer.py                      |   5 +-
 search/agent/tests/test_base_consumer.py      | 166 ---------
 .../tests/{tests.py => test_integration.py}   |   2 +-
 7 files changed, 11 insertions(+), 563 deletions(-)
 delete mode 100644 search/agent/base.py
 delete mode 100644 search/agent/tests/test_base_consumer.py
 rename search/agent/tests/{tests.py => test_integration.py} (99%)

diff --git a/Pipfile b/Pipfile
index 7f4fd3f5..12313eb8 100644
--- a/Pipfile
+++ b/Pipfile
@@ -7,7 +7,7 @@ name = "pypi"
 
 [packages]
 
-arxiv-base = "==0.5.2"
+"arxiv-base"==0.6
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
@@ -50,6 +50,7 @@ typed-ast = "==1.1.0"
 werkzeug = "==0.13"
 wtforms = "==2.1"
 bleach = "*"
+"17e504d" = {path = "./../arxiv-base-ui"}
 
 
 [dev-packages]
diff --git a/Pipfile.lock b/Pipfile.lock
index e3b87842..7414bbb3 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "f22ed549999fe9d8243b6323183835e709216454c28c54fc5248560c36379cf3"
+            "sha256": "835437127f096ee2dfbac653520248f8a84fd703f3652183f07e82b6f49612c4"
         },
         "host-environment-markers": {
             "implementation_name": "cpython",
@@ -27,11 +27,8 @@
         ]
     },
     "default": {
-        "arxiv-base": {
-            "hashes": [
-                "sha256:73afac321bd5a358ea63ba3dddeaa63834a22f44afb8b4872c73aa1a857c7a88"
-            ],
-            "version": "==0.5.2"
+        "17e504d": {
+            "path": "./../arxiv-base-ui"
         },
         "bleach": {
             "hashes": [
diff --git a/search/agent/__init__.py b/search/agent/__init__.py
index ad7cef49..2f81e130 100644
--- a/search/agent/__init__.py
+++ b/search/agent/__init__.py
@@ -9,17 +9,11 @@
 and becomes available for discovery via :mod:`search.routes.ui`.
 """
 from typing import Optional
-from datetime import datetime
-import warnings
 
 from flask import current_app as app
 
-from arxiv.base import logging
+from arxiv.base import agent
 from .consumer import MetadataRecordProcessor, DocumentFailed, IndexingFailed
-from .base import CheckpointManager
-
-logger = logging.getLogger(__name__)
-logger.propagate = False
 
 
 def process_stream(duration: Optional[int] = None) -> None:
@@ -35,30 +29,5 @@ def process_stream(duration: Optional[int] = None) -> None:
     """
     # We use the Flask application instance for configuration, and to manage
     # integrations with metadata service, search index.
-    with warnings.catch_warnings():     # boto3 is notoriously annoying.
-        warnings.simplefilter("ignore")
-        start_at = app.config.get('KINESIS_START_AT')
-        start_type = app.config.get('KINESIS_START_TYPE')
-        if not start_type:
-            start_type = 'AT_TIMESTAMP'
-        if start_type == 'AT_TIMESTAMP' and not start_at:
-            start_at = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
-
-        processor = MetadataRecordProcessor(
-            app.config['KINESIS_STREAM'],
-            app.config['KINESIS_SHARD_ID'],
-            app.config['AWS_ACCESS_KEY_ID'],
-            app.config['AWS_SECRET_ACCESS_KEY'],
-            app.config['AWS_REGION'],
-            CheckpointManager(
-                app.config['KINESIS_CHECKPOINT_VOLUME'],
-                app.config['KINESIS_STREAM'],
-                app.config['KINESIS_SHARD_ID'],
-            ),
-            endpoint=app.config.get('KINESIS_ENDPOINT', None),
-            verify=app.config.get('KINESIS_VERIFY', 'true') == 'true',
-            duration=duration,
-            start_type=start_type,
-            start_at=start_at
-        )
-        processor.go()
+    agent.process_stream(MetadataRecordProcessor, app.config,
+                         duration=duration)
diff --git a/search/agent/base.py b/search/agent/base.py
deleted file mode 100644
index acbd5585..00000000
--- a/search/agent/base.py
+++ /dev/null
@@ -1,352 +0,0 @@
-"""
-Provides a base class for Kinesis record handling.
-
-.. _todo: This should move to arXiv-base, per ARXIVNG-281.
-"""
-
-import time
-import json
-from datetime import datetime, timedelta
-import os
-from typing import Any, Optional, Tuple, Generator, Callable, Dict, Union
-from contextlib import contextmanager
-import signal
-
-import boto3
-from botocore.exceptions import WaiterError, NoCredentialsError, \
-    PartialCredentialsError, BotoCoreError, ClientError
-
-from arxiv.base import logging
-logger = logging.getLogger(__name__)
-logger.propagate = False
-
-NOW = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
-
-
-class CheckpointError(RuntimeError):
-    """Checkpointing failed."""
-
-
-class StreamNotAvailable(RuntimeError):
-    """Could not find or connect to the stream."""
-
-
-class KinesisRequestFailed(RuntimeError):
-    """Raised when a Kinesis request failed permanently."""
-
-
-class StopProcessing(RuntimeError):
-    """Gracefully stopped processing upon unrecoverable error."""
-
-
-class ConfigurationError(RuntimeError):
-    """There was a problem with the configuration."""
-
-
-def retry(retries: int = 5, wait: int = 5) -> Callable:
-    """
-    Decorator factory for retrying Kinesis calls.
-
-    Parameters
-    ----------
-    retries : int
-        Number of times to retry before failing.
-    wait : int
-        Number of seconds to wait between retries.
-
-    Returns
-    -------
-    function
-        A decorator that retries the decorated func ``retries`` times before
-        raising :class:`.KinesisRequestFailed`.
-
-    """
-    __retries = retries
-
-    def decorator(func: Callable) -> Callable:
-        """Retry the decorated func on ClientErrors up to ``retries`` times."""
-        _retries = __retries
-
-        def inner(*args, **kwargs) -> Any:  # type: ignore
-            retries = _retries
-            while retries > 0:
-                try:
-                    return func(*args, **kwargs)
-                except ClientError as e:
-                    code = e.response['Error']['Code']
-                    logger.error('Caught ClientError %s, retrying', code)
-                    time.sleep(wait)
-                    retries -= 1
-            raise KinesisRequestFailed('Max retries; last code: {code}')
-        return inner
-    return decorator
-
-
-class CheckpointManager(object):
-    """Provides on-disk loading and updating of consumer checkpoints."""
-
-    def __init__(self, base_path: str, stream_name: str, shard_id: str) \
-            -> None:
-        """Load or create a new file for checkpointing."""
-        if not os.path.exists(base_path):
-            raise ValueError(f'Path does not exist: {base_path}')
-        self.file_path = os.path.join(base_path,
-                                      f'{stream_name}__{shard_id}.json')
-        if not os.path.exists(self.file_path):
-            try:
-                with open(self.file_path, 'w') as f:
-                    f.write('')
-            except Exception as e:   # The containing path doesn't exist.
-                raise ValueError(f'Could not use {self.file_path}') from e
-
-        with open(self.file_path) as f:
-            position = f.read()
-        self.position = position if position else None
-
-    def checkpoint(self, position: str) -> None:
-        """Checkpoint at ``position``."""
-        try:
-            with open(self.file_path, 'w') as f:
-                f.write(position)
-            self.position = position
-        except Exception as e:
-            raise CheckpointError('Could not checkpoint') from e
-
-
-class BaseConsumer(object):
-    """
-    Kinesis stream consumer.
-
-    Consumes a single shard from a single stream, and checkpoints on disk
-    (to reduce external dependencies).
-    """
-
-    def __init__(self, stream_name: str = '', shard_id: str = '',
-                 access_key: str = '', secret_key: str = '', region: str = '',
-                 checkpointer: Optional[CheckpointManager] = None,
-                 back_off: int = 5, batch_size: int = 50,
-                 endpoint: Optional[str] = None, verify: bool = True,
-                 duration: Optional[int] = None,
-                 start_type: str = 'AT_TIMESTAMP',
-                 start_at: str = NOW) -> None:
-        """Initialize a new stream consumer."""
-        logger.info(f'New consumer for {stream_name} ({shard_id})')
-        self.stream_name = stream_name
-        self.shard_id = shard_id
-        self.checkpointer = checkpointer
-        if self.checkpointer:
-            self.position = self.checkpointer.position
-        else:
-            self.position = None
-        self.duration = duration
-        self.start_time = None
-        self.back_off = back_off
-        self.batch_size = batch_size
-        self.sleep_time = 5
-        self.start_at = start_at
-        self.start_type = start_type
-        logger.info(f'Got start_type={start_type} and start_at={start_at}')
-
-        if not self.stream_name or not self.shard_id:
-            logger.info(
-                'No stream indicated; making no attempt to connect to Kinesis'
-            )
-            return
-
-        logger.info(f'Getting a new connection to Kinesis at {endpoint}'
-                    f' in region {region}, with SSL verification={verify}')
-        self.client = boto3.client('kinesis',
-                                   aws_access_key_id=access_key,
-                                   aws_secret_access_key=secret_key,
-                                   endpoint_url=endpoint,
-                                   verify=verify,
-                                   region_name=region)
-
-        logger.info(f'Waiting for {self.stream_name} to be available')
-        try:
-            self.wait_for_stream()
-        except (KinesisRequestFailed, StreamNotAvailable):
-            logger.info('Could not connect to stream; attempting to create')
-            self.client.create_stream(
-                StreamName=self.stream_name,
-                ShardCount=1
-            )
-            logger.info(f'Created; waiting for {self.stream_name} again')
-            self.wait_for_stream()
-
-        # Intercept SIGINT and SIGTERM so that we can checkpoint before exit.
-        self.exit = False
-        signal.signal(signal.SIGINT, self.stop)
-        signal.signal(signal.SIGTERM, self.stop)
-        logger.info('Ready to start')
-
-    def stop(self, signal: int, frame: Any) -> None:
-        """Set exit flag for a graceful stop."""
-        logger.error(f'Received signal {signal}')
-        self._checkpoint()
-        logger.error('Done')
-        raise StopProcessing(f'Received signal {signal}')
-
-    @retry(5, 10)
-    def wait_for_stream(self) -> None:
-        """
-        Wait for the stream to become available.
-
-        If the stream becomes available, returns ``None``. Otherwise, raises
-        a :class:`.StreamNotAvailable` exception.
-
-        Raises
-        ------
-        :class:`.StreamNotAvailable`
-            Raised when the stream could not be reached.
-
-        """
-        waiter = self.client.get_waiter('stream_exists')
-        try:
-            logger.error(f'Waiting for stream {self.stream_name}')
-            waiter.wait(
-                StreamName=self.stream_name,
-                Limit=1,
-                ExclusiveStartShardId=self.shard_id
-            )
-        except WaiterError as e:
-            logger.error('Failed to get stream while waiting')
-            raise StreamNotAvailable('Could not connect to stream') from e
-        except (PartialCredentialsError, NoCredentialsError) as e:
-            logger.error('Credentials missing or incomplete: %s', e.msg)
-            raise ConfigurationError('Credentials missing') from e
-
-    def _get_iterator(self) -> str:
-        """
-        Get a new shard iterator.
-
-        If our position is set, we will start with the record immediately after
-        that position. Otherwise, we start at ``start_at`` timestamp.
-
-        Returns
-        -------
-        str
-            The sequence ID of the record on which to start.
-
-        """
-        params: Dict[str, Any] = dict(
-            StreamName=self.stream_name,
-            ShardId=self.shard_id
-        )
-        if self.position:
-            params.update(dict(
-                ShardIteratorType='AFTER_SEQUENCE_NUMBER',
-                StartingSequenceNumber=self.position
-            ))
-        elif self.start_type == 'AT_TIMESTAMP' and self.start_at:
-            start_at = datetime.strptime(self.start_at, '%Y-%m-%dT%H:%M:%S')
-            params.update(dict(
-                ShardIteratorType='AT_TIMESTAMP',
-                Timestamp=(
-                    start_at - datetime.utcfromtimestamp(0)
-                ).total_seconds()
-            ))
-        elif self.start_type == 'TRIM_HORIZON':
-            params.update(dict(ShardIteratorType='TRIM_HORIZON'))
-        try:
-            it: str = self.client.get_shard_iterator(**params)['ShardIterator']
-            return it
-        except self.client.exceptions.InvalidArgumentException as e:
-            logger.info('Got InvalidArgumentException: %s', str(e))
-            # Iterator may not have come from this stream/shard.
-            if self.position is not None:
-                self.position = None
-                return self._get_iterator()
-        raise KinesisRequestFailed('Could not get shard iterator')
-
-    def _checkpoint(self) -> None:
-        """
-        Checkpoint at the current position.
-
-        The current position is the sequence number of the last record that was
-        successfully processed.
-        """
-        if self.position is not None and self.checkpointer:
-            self.checkpointer.checkpoint(self.position)
-            logger.debug(f'Set checkpoint at {self.position}')
-
-    @retry(retries=10, wait=5)
-    def get_records(self, iterator: str, limit: int) -> Tuple[str, dict]:
-        """Get the next batch of ``limit`` or fewer records."""
-        logger.debug(f'Get more records from {iterator}, limit {limit}')
-        response = self.client.get_records(ShardIterator=iterator,
-                                           Limit=limit)
-        iterator = response['NextShardIterator']
-        return iterator, response
-
-    def _check_timeout(self) -> None:
-        """If a processing duration is set, exit if duration is exceeded."""
-        if not self.start_time or not self.duration:
-            return
-        running_for = time.time() - self.start_time
-        if running_for > self.duration:
-            logger.info(f'Ran for {running_for} seconds; exiting')
-            self._checkpoint()
-            raise StopProcessing(f'Ran for {running_for} seconds; exiting')
-
-    def process_records(self, start: str) -> Tuple[str, int]:
-        """Retrieve and process records starting at ``start``."""
-        logger.debug(f'Get more records, starting at {start}')
-        processed = 0
-        try:
-            time.sleep(self.sleep_time)   # Don't get carried away.
-            next_start, response = self.get_records(start, self.batch_size)
-        except Exception as e:
-            self._checkpoint()
-            raise StopProcessing('Unhandled exception: %s' % str(e)) from e
-
-        logger.debug('Got %i records', len(response['Records']))
-        for record in response['Records']:
-            self._check_timeout()
-
-            # It is possible that Kinesis will replay the same message several
-            # times, especially at the end of the stream. There's no point in
-            # replaying the message, so we'll continue on.
-            if record['SequenceNumber'] == self.position:
-                continue
-
-            self.process_record(record)
-            processed += 1
-
-            # Setting the position means that we have successfully
-            # processed this record.
-            if record['SequenceNumber']:    # Make sure it's set.
-                self.position = record['SequenceNumber']
-                logger.debug(f'Updated position to {self.position}')
-        logger.debug(f'Next start is {next_start}')
-        return next_start, processed
-
-    def go(self) -> None:
-        """Main processing routine."""
-        self.start_time = time.time()
-        logger.info(f'Starting processing from position {self.position}'
-                    f' on stream {self.stream_name} and shard {self.shard_id}')
-
-        start = self._get_iterator()
-        while True:
-            start, processed = self.process_records(start)
-            if processed > 0:
-                self._checkpoint()  # Checkpoint after every batch.
-            if start is None:     # Shard is closed.
-                logger.error('Shard closed unexpectedly; no new iterator')
-                self._checkpoint()
-                raise StopProcessing('Could not get a new iterator')
-            self._check_timeout()
-
-    def process_record(self, record: dict) -> None:
-        """
-        Process a single record from the stream.
-
-        Parameters
-        ----------
-        record : dict
-
-        """
-        logger.info(f'Processing record {record["SequenceNumber"]}')
-        logger.debug(f'Process record {record}')
-        # raise NotImplementedError('Should be implemented by a subclass')
diff --git a/search/agent/consumer.py b/search/agent/consumer.py
index c83a569d..5c0bc60e 100644
--- a/search/agent/consumer.py
+++ b/search/agent/consumer.py
@@ -1,14 +1,13 @@
 """Provides a record processor for MetadataIsAvailable notifications."""
-from typing import Dict, List
 
 import json
 import os
-from typing import List, Any, Optional
+from typing import List, Any, Optional, Dict
 from arxiv.base import logging
 from search.services import metadata, index
 from search.process import transform
 from search.domain import DocMeta, Document, asdict
-from .base import BaseConsumer
+from arxiv.base.agent import BaseConsumer
 
 logger = logging.getLogger(__name__)
 logger.propagate = False
diff --git a/search/agent/tests/test_base_consumer.py b/search/agent/tests/test_base_consumer.py
deleted file mode 100644
index ede647e5..00000000
--- a/search/agent/tests/test_base_consumer.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""Tests for :class:`.BaseConsumer`."""
-
-from unittest import TestCase, mock
-from botocore.exceptions import BotoCoreError, WaiterError, ClientError
-
-from search.agent.base import BaseConsumer, StreamNotAvailable, StopProcessing
-
-
-class TestBaseConsumer(TestCase):
-    """Test :class:`.BaseConsumer` behavior and public methods."""
-
-    def setUp(self):
-        self.checkpointer = mock.MagicMock()
-        self.checkpointer.position = None
-
-    @mock.patch('boto3.client')
-    def test_init(self, mock_client_factory):
-        """On init, consumer should wait for stream to be available."""
-        mock_client = mock.MagicMock()
-        mock_waiter = mock.MagicMock()
-        mock_client.get_waiter.return_value = mock_waiter
-        mock_client_factory.return_value = mock_client
-
-        try:
-            BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop', 'us-east-1',
-                         self.checkpointer)
-        except Exception as e:
-            self.fail('If the waiter returns without an exception, no'
-                      ' exception should be raised.')
-        self.assertEqual(mock_waiter.wait.call_count, 1,
-                         "A boto3 waiter should be used")
-
-    @mock.patch('boto3.client')
-    def test_init_stream_not_available(self, mock_client_factory):
-        """If the stream is not available, should raise an exception."""
-        mock_client = mock.MagicMock()
-        mock_waiter = mock.MagicMock()
-
-        def raise_waiter_error(*a, **k):
-            raise WaiterError('', {}, {})
-
-        mock_waiter.wait.side_effect = raise_waiter_error
-        mock_client.get_waiter.return_value = mock_waiter
-        mock_client_factory.return_value = mock_client
-        with self.assertRaises(StreamNotAvailable):
-            BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop', 'us-east-1',
-                         self.checkpointer)
-
-    @mock.patch('boto3.client')
-    def test_iteration(self, mock_client_factory):
-        """Test iteration behavior."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_records.return_value = {
-            'Records': [
-                {'SequenceNumber': str(i)} for i in range(10)
-            ],
-            'NextShardIterator': '10'
-        }
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer)
-        next_start, processed = consumer.process_records('0')
-        self.assertGreater(mock_client.get_records.call_count, 0)
-        self.assertEqual(processed, 10)
-        self.assertEqual(next_start, '10', "Should return NextShardIterator")
-
-    @mock.patch('boto3.client')
-    def test_process_records_until_shard_closes(self, mock_client_factory):
-        """Should call GetRecords until no next iterator is available."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}
-
-        def get_records(**kwargs):
-            start = int(kwargs['ShardIterator'])
-            end = start + int(kwargs['Limit'])
-            if start > 500:
-                return {'Records': [], 'NextShardIterator': None}
-            return {
-                'Records': [
-                    {'SequenceNumber': str(i)} for i in range(start, end)
-                ],
-                'NextShardIterator': str(end + 1)
-            }
-
-        mock_client.get_records.side_effect = get_records
-
-        batch_size = 50
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer,
-                                batch_size=batch_size)
-        with self.assertRaises(StopProcessing):
-            consumer.go()
-        self.assertEqual(mock_client.get_records.call_count,
-                         (500/batch_size) + 1,
-                         "Should call Kinesis GetRecords until no iterator"
-                         " is returned.")
-
-    @mock.patch('boto3.client')
-    def test_process_records_with_clienterror(self, mock_client_factory):
-        """Should try to checkpoint before exiting."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}
-
-        def raise_client_error(*args, **kwargs):
-            raise ClientError({'Error': {'Code': 'foo'}}, {})
-
-        mock_client.get_records.side_effect = raise_client_error
-
-        batch_size = 50
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer,
-                                batch_size=batch_size)
-        consumer.position = 'fooposition'
-        try:
-            consumer.go()
-        except Exception:
-            pass
-        self.assertEqual(self.checkpointer.checkpoint.call_count, 1)
-
-    @mock.patch('boto3.client')
-    def test_start_from_timestamp(self, mock_client_factory):
-        """Consumer is initialized with start_type 'AT_TIMESTAMP'."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}
-
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer,
-                                start_type='AT_TIMESTAMP')
-        consumer._get_iterator()
-        args, kwargs = mock_client.get_shard_iterator.call_args
-        self.assertEqual(kwargs['ShardIteratorType'], 'AT_TIMESTAMP')
-        self.assertIn('Timestamp', kwargs)
-
-    @mock.patch('boto3.client')
-    def test_start_from_position(self, mock_client_factory):
-        """Consumer is initialized with start_type 'AT_TIMESTAMP'."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}
-
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer,
-                                start_type='AT_TIMESTAMP')
-        consumer.position = 'fooposition'
-        consumer._get_iterator()
-        args, kwargs = mock_client.get_shard_iterator.call_args
-        self.assertEqual(kwargs['ShardIteratorType'], 'AFTER_SEQUENCE_NUMBER')
-        self.assertEqual(kwargs['StartingSequenceNumber'], 'fooposition')
-
-    @mock.patch('boto3.client')
-    def test_start_from_trim_horizon(self, mock_client_factory):
-        """Consumer is initialized with start_type 'AT_TIMESTAMP'."""
-        mock_client = mock.MagicMock()
-        mock_client_factory.return_value = mock_client
-        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}
-
-        consumer = BaseConsumer('foo', '1', 'a1b2c3d4', 'qwertyuiop',
-                                'us-east-1', self.checkpointer,
-                                start_type='TRIM_HORIZON')
-        consumer._get_iterator()
-        args, kwargs = mock_client.get_shard_iterator.call_args
-        self.assertEqual(kwargs['ShardIteratorType'], 'TRIM_HORIZON')
-        self.assertNotIn('StartingSequenceNumber', kwargs)
diff --git a/search/agent/tests/tests.py b/search/agent/tests/test_integration.py
similarity index 99%
rename from search/agent/tests/tests.py
rename to search/agent/tests/test_integration.py
index 9ad22496..9fed6ca3 100644
--- a/search/agent/tests/tests.py
+++ b/search/agent/tests/test_integration.py
@@ -10,7 +10,7 @@
 import threading
 
 from search.agent import process_stream
-from search.agent.base import StopProcessing
+from arxiv.base.agent import StopProcessing
 from search.services import metadata
 from search.domain import DocMeta
 from search.factory import create_ui_web_app

From 4dfc7b55203c93608ef1f78997ef4f025fa84667 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Sat, 8 Sep 2018 07:15:08 -0400
Subject: [PATCH 02/77] ARXIVNG-1177 prototype API

---
 api.py                                |  5 ++
 search/controllers/api/__init__.py    | 90 +++++++++++++++++++++++++++
 search/controllers/simple/forms.py    | 19 +-----
 search/domain/__init__.py             |  1 +
 search/domain/api.py                  | 22 +++++++
 search/domain/base.py                 | 18 ++++++
 search/encode.py                      | 23 +++++++
 search/factory.py                     | 24 ++++++-
 search/routes/api.py                  | 29 +++++++++
 search/services/index/__init__.py     | 19 +++---
 search/services/index/highlighting.py |  2 +
 search/services/index/results.py      | 23 +++++--
 12 files changed, 242 insertions(+), 33 deletions(-)
 create mode 100644 api.py
 create mode 100644 search/controllers/api/__init__.py
 create mode 100644 search/domain/api.py
 create mode 100644 search/encode.py
 create mode 100644 search/routes/api.py

diff --git a/api.py b/api.py
new file mode 100644
index 00000000..c51ea39e
--- /dev/null
+++ b/api.py
@@ -0,0 +1,5 @@
+"""Provides application for development purposes."""
+
+from search.factory import create_api_web_app
+
+app = create_api_web_app()
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
new file mode 100644
index 00000000..85fff358
--- /dev/null
+++ b/search/controllers/api/__init__.py
@@ -0,0 +1,90 @@
+"""Controller for search API requests."""
+
+from typing import Tuple, Dict, Any, Optional
+import re
+from datetime import date, datetime
+from dateutil.relativedelta import relativedelta
+import dateutil.parser
+from pytz import timezone
+import pytz
+
+
+from werkzeug.datastructures import MultiDict, ImmutableMultiDict
+from werkzeug.exceptions import InternalServerError, BadRequest, NotFound
+from flask import url_for
+
+from arxiv import status, taxonomy
+
+from search.services import index, fulltext, metadata
+from search.controllers.util import paginate
+from ...domain import Query, APIQuery, FieldedSearchList, FieldedSearchTerm, \
+    DateRange, ClassificationList, Classification, asdict
+
+Response = Tuple[Dict[str, Any], int, Dict[str, Any]]
+EASTERN = timezone('US/Eastern')
+
+
+def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
+    terms = FieldedSearchList()
+    for field, _ in Query.SUPPORTED_FIELDS:
+        values = params.getlist(field)
+        for value in values:
+            terms.append(
+                FieldedSearchTerm(operator='AND', field=field, term=value)
+            )
+    if len(terms) == 0:
+        return
+    return terms
+
+
+def _get_date_params(params: MultiDict) -> Optional[DateRange]:
+    date_params = {}
+    for field in ['start_date', 'end_date', 'date_type']:
+        value = params.getlist(field)
+        if not value:
+            continue
+        try:
+            dt = dateutil.parser.parse(value[0])
+            if not dt.tzinfo:
+                dt = pytz.utc.localize(dt)
+            dt = dt.replace(tzinfo=EASTERN)
+        except ValueError:
+            raise BadRequest({'field': field, 'reason': 'invalid datetime'})
+        date_params[field] = dt
+    if date_params:
+        return DateRange(**date_params)
+    return
+
+
+def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
+    classifications = ClassificationList()
+    for value in params.getlist('primary_classification'):
+        if value not in taxonomy.ARCHIVES:
+            raise BadRequest({
+                'field': 'primary_classification',
+                'reason': 'not a valid archive'
+            })
+        classifications.append(
+            Classification(archive=value)
+        )
+    if len(classifications) == 0:
+        return
+    return classifications
+
+
+def search(params: MultiDict) -> Response:
+    """Handle a search request from the API."""
+    q = APIQuery()
+    terms = _get_fielded_terms(params)
+    if terms is not None:
+        q.terms = terms
+    date_range = _get_date_params(params)
+    if date_range is not None:
+        q.date_range = date_range
+
+    classifications = _get_classifications(params)
+    if classifications is not None:
+        q.primary_classification = classifications
+
+    q = paginate(q, params)
+    return asdict(index.search(q, highlight=False)), 200, {}
diff --git a/search/controllers/simple/forms.py b/search/controllers/simple/forms.py
index 9e60163a..2d5b6049 100644
--- a/search/controllers/simple/forms.py
+++ b/search/controllers/simple/forms.py
@@ -9,28 +9,13 @@
 
 from search.controllers.util import does_not_start_with_wildcard, \
                                     has_balanced_quotes, strip_white_space
+from ...domain import Query
 
 
 class SimpleSearchForm(Form):
     """Provides a simple field-query search form."""
 
-    searchtype = SelectField("Field", choices=[
-        ('all', 'All fields'),
-        ('title', 'Title'),
-        ('author', 'Author(s)'),
-        ('abstract', 'Abstract'),
-        ('comments', 'Comments'),
-        ('journal_ref', 'Journal reference'),
-        ('acm_class', 'ACM classification'),
-        ('msc_class', 'MSC classification'),
-        ('report_num', 'Report number'),
-        ('paper_id', 'arXiv identifier'),
-        ('doi', 'DOI'),
-        ('orcid', 'ORCID'),
-        ('author_id', 'arXiv author ID'),
-        ('help', 'Help pages'),
-        ('full_text', 'Full text')
-    ])
+    searchtype = SelectField("Field", choices=Query.SUPPORTED_FIELDS)
     query = StringField('Search or Article ID',
                         filters=[strip_white_space],
                         validators=[does_not_start_with_wildcard,
diff --git a/search/domain/__init__.py b/search/domain/__init__.py
index 8e3e3512..9247c384 100644
--- a/search/domain/__init__.py
+++ b/search/domain/__init__.py
@@ -11,3 +11,4 @@
 # pylint: disable=wildcard-import
 from .base import *
 from .advanced import *
+from .api import *
diff --git a/search/domain/api.py b/search/domain/api.py
new file mode 100644
index 00000000..9de39eb1
--- /dev/null
+++ b/search/domain/api.py
@@ -0,0 +1,22 @@
+"""API-specific domain classes."""
+
+from .base import DateRange, Query, ClassificationList
+from .advanced import FieldedSearchList, FieldedSearchTerm
+
+from dataclasses import dataclass, field
+from typing import NamedTuple, Optional
+
+
+@dataclass
+class APIQuery(Query):
+    """
+    Represents an API query.
+
+    Similar to an advanced query.
+    """
+
+    date_range: Optional[DateRange] = None
+    primary_classification: ClassificationList = field(
+        default_factory=ClassificationList
+    )
+    terms: FieldedSearchList = field(default_factory=FieldedSearchList)
diff --git a/search/domain/base.py b/search/domain/base.py
index 8a90a1eb..97edddba 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -125,6 +125,24 @@ def __str__(self) -> str:
 class Query:
     """Represents a search query originating from the UI or API."""
 
+    SUPPORTED_FIELDS = [
+        ('all', 'All fields'),
+        ('title', 'Title'),
+        ('author', 'Author(s)'),
+        ('abstract', 'Abstract'),
+        ('comments', 'Comments'),
+        ('journal_ref', 'Journal reference'),
+        ('acm_class', 'ACM classification'),
+        ('msc_class', 'MSC classification'),
+        ('report_num', 'Report number'),
+        ('paper_id', 'arXiv identifier'),
+        ('doi', 'DOI'),
+        ('orcid', 'ORCID'),
+        ('author_id', 'arXiv author ID'),
+        ('help', 'Help pages'),
+        ('full_text', 'Full text')
+    ]
+
     order: Optional[str] = field(default=None)
     page_size: int = field(default=50)
     page_start: int = field(default=0)
diff --git a/search/encode.py b/search/encode.py
new file mode 100644
index 00000000..6a17ee3e
--- /dev/null
+++ b/search/encode.py
@@ -0,0 +1,23 @@
+"""Utilities for response encoding/serialization."""
+
+from datetime import date, datetime
+
+from flask.json import JSONEncoder
+
+from typing import Any, List, Union
+
+
+class ISO8601JSONEncoder(JSONEncoder):
+    """Renders date and datetime objects as ISO8601 datetime strings."""
+
+    def default(self, obj: Any) -> Union[str, List[Any]]:
+        """Overriden to render date(time)s in isoformat."""
+        try:
+            if isinstance(obj, (date, datetime)):
+                return obj.isoformat()
+            iterable = iter(obj)
+        except TypeError:
+            pass
+        else:
+            return list(iterable)
+        return JSONEncoder.default(self, obj) #type: ignore
diff --git a/search/factory.py b/search/factory.py
index db5ebebd..b4bcda9e 100644
--- a/search/factory.py
+++ b/search/factory.py
@@ -7,9 +7,10 @@
 
 from arxiv.base import Base
 from arxiv.base.middleware import wrap, request_logs
-from search.routes import ui
+from search.routes import ui, api
 from search.services import index
 from search.converters import ArchiveConverter
+from search.encode import ISO8601JSONEncoder
 
 s3 = FlaskS3()
 
@@ -34,3 +35,24 @@ def create_ui_web_app() -> Flask:
     wrap(app, [request_logs.ClassicLogsMiddleware])
 
     return app
+
+
+def create_api_web_app() -> Flask:
+    """Initialize an instance of the search frontend UI web application."""
+    logging.getLogger('boto').setLevel(logging.ERROR)
+    logging.getLogger('boto3').setLevel(logging.ERROR)
+    logging.getLogger('botocore').setLevel(logging.ERROR)
+
+    app = Flask('search')
+    app.json_encoder = ISO8601JSONEncoder
+    app.config.from_pyfile('config.py')
+
+
+    index.init_app(app)
+
+    Base(app)
+    app.register_blueprint(api.blueprint)
+
+    wrap(app, [request_logs.ClassicLogsMiddleware])
+
+    return app
diff --git a/search/routes/api.py b/search/routes/api.py
new file mode 100644
index 00000000..86e1f7ae
--- /dev/null
+++ b/search/routes/api.py
@@ -0,0 +1,29 @@
+import json
+from typing import Dict, Callable, Union, Any, Optional, List
+from functools import wraps
+from urllib.parse import urljoin, urlparse, parse_qs, urlencode, urlunparse
+
+from flask.json import jsonify
+from flask import Blueprint, render_template, redirect, request, Response, \
+    url_for
+from werkzeug.urls import Href, url_encode, url_parse, url_unparse, url_encode
+from werkzeug.datastructures import MultiDict, ImmutableMultiDict
+
+from arxiv import status
+from arxiv.base import logging
+from werkzeug.exceptions import InternalServerError
+from search.controllers import api
+
+# from arxiv.users.auth.decorators import scoped
+# from arxiv.users.auth import scopes
+
+logger = logging.getLogger(__name__)
+
+blueprint = Blueprint('api', __name__, url_prefix='/')
+
+
+# @scoped(scopes.READ_API)
+@blueprint.route('/', methods=['GET'])
+def search() -> Response:
+    data, status, headers = api.search(request.args)
+    return jsonify(data), status, headers
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index 58f00672..cee1ea40 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -33,7 +33,7 @@
 from search.context import get_application_config, get_application_global
 from arxiv.base import logging
 from search.domain import Document, DocumentSet, Query, AdvancedQuery, \
-    SimpleQuery, asdict
+    SimpleQuery, asdict, APIQuery
 
 from .exceptions import QueryError, IndexConnectionError, DocumentNotFound, \
     IndexingError, OutsideAllowedRange, MappingError
@@ -358,7 +358,7 @@ def get_document(self, document_id: int) -> Document:
         return Document(**record['_source'])    # type: ignore
         # See https://github.com/python/mypy/issues/3937
 
-    def search(self, query: Query) -> DocumentSet:
+    def search(self, query: Query, highlight: bool = True) -> DocumentSet:
         """
         Perform a search.
 
@@ -389,7 +389,7 @@ def search(self, query: Query) -> DocumentSet:
         logger.debug('got current search request %s', str(query))
         current_search = self._base_search()
         try:
-            if isinstance(query, AdvancedQuery):
+            if isinstance(query, AdvancedQuery) or isinstance(query, APIQuery):
                 current_search = advanced_search(current_search, query)
             elif isinstance(query, SimpleQuery):
                 current_search = simple_search(current_search, query)
@@ -397,16 +397,17 @@ def search(self, query: Query) -> DocumentSet:
             logger.error('Malformed query: %s', str(e))
             raise QueryError('Malformed query') from e
 
-        # Highlighting is performed by Elasticsearch; here we include the
-        # fields and configuration for highlighting.
-        current_search = highlight(current_search)
+        if highlight:
+            # Highlighting is performed by Elasticsearch; here we include the
+            # fields and configuration for highlighting.
+            current_search = highlight(current_search)
 
         with handle_es_exceptions():
             # Slicing the search adds pagination parameters to the request.
             resp = current_search[query.page_start:query.page_end].execute()
 
         # Perform post-processing on the search results.
-        return results.to_documentset(query, resp)
+        return results.to_documentset(query, resp, highlight=highlight)
 
     def exists(self, paper_id_v: str) -> bool:
         """Determine whether a paper exists in the index."""
@@ -456,9 +457,9 @@ def current_session() -> SearchSession:
 
 
 @wraps(SearchSession.search)
-def search(query: Query) -> DocumentSet:
+def search(query: Query, highlight: bool = True) -> DocumentSet:
     """Retrieve search results."""
-    return current_session().search(query)
+    return current_session().search(query, highlight=highlight)
 
 
 @wraps(SearchSession.add_document)
diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index c9f4a504..82ec7c4b 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -174,6 +174,8 @@ def add_highlighting(result: dict, raw: Response) -> dict:
     # them together. Note that dir(None) won't return anything, so this block
     # is skipped if there are no highlights from ES.
     for field in dir(highlighted_fields):
+        if field.startswith('_'):
+            continue
         value = getattr(highlighted_fields, field)
         if hasattr(value, '__iter__'):
             value = '&hellip;'.join(value)
diff --git a/search/services/index/results.py b/search/services/index/results.py
index 27174c75..88f89db5 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -10,6 +10,7 @@
 from typing import Any, Dict
 
 from elasticsearch_dsl.response import Response
+from elasticsearch_dsl.utils import AttrList, AttrDict
 from search.domain import Document, Query, DocumentSet
 from arxiv.base import logging
 
@@ -20,7 +21,7 @@
 logger.propagate = False
 
 
-def _to_document(raw: Response) -> Document:
+def _to_document(raw: Response, highlight: bool = True) -> Document:
     """Transform an ES search result back into a :class:`.Document`."""
     # typing: ignore
     result: Dict[str, Any] = {}
@@ -31,6 +32,14 @@ def _to_document(raw: Response) -> Document:
         if not hasattr(raw, key):
             continue
         value = getattr(raw, key)
+
+        # We want to prevent ES-specific data types from escaping the module
+        # API.
+        if isinstance(value, AttrList):
+            value = value._l_
+        elif isinstance(value, AttrDict):
+            value = value.to_dict()
+
         if key == 'announced_date_first' and value and isinstance(value, str):
             value = datetime.strptime(value, '%Y-%m').date()
         if key in ['submitted_date', 'submitted_date_first',
@@ -47,18 +56,20 @@ def _to_document(raw: Response) -> Document:
 
         result[key] = value
     result['score'] = raw.meta.score
-    if type(result['abstract']) is str:
+    if type(result['abstract']) is str and highlight:
         result['preview']['abstract'] = preview(result['abstract'])
         if result['preview']['abstract'].endswith('&hellip;'):
             result['truncated']['abstract'] = True
 
-    logger.debug('%s: add highlighting to result', raw.paper_id)
-    result = add_highlighting(result, raw)
+    if highlight:
+        logger.debug('%s: add highlighting to result', raw.paper_id)
+        result = add_highlighting(result, raw)
     return Document(**result)   # type: ignore
     # See https://github.com/python/mypy/issues/3937
 
 
-def to_documentset(query: Query, response: Response) -> DocumentSet:
+def to_documentset(query: Query, response: Response, highlight: bool = True) \
+        -> DocumentSet:
     """
     Transform a response from ES to a :class:`.DocumentSet`.
 
@@ -93,6 +104,6 @@ def to_documentset(query: Query, response: Response) -> DocumentSet:
             'page_size': query.page_size,
             'max_pages': max_pages
         },
-        'results': [_to_document(raw) for raw in response]
+        'results': [_to_document(raw, highlight=highlight) for raw in response]
     })
     # See https://github.com/python/mypy/issues/3937

From 65c471159f1f3bd615f6dadc118a4c9439e0a0d2 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Sat, 8 Sep 2018 07:21:30 -0400
Subject: [PATCH 03/77] set max page size

---
 search/controllers/api/__init__.py | 52 +++++++++++++++++++-----------
 search/controllers/util.py         |  4 +--
 search/domain/base.py              |  3 ++
 search/services/index/__init__.py  |  4 +--
 4 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 85fff358..851032f7 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -24,6 +24,40 @@
 EASTERN = timezone('US/Eastern')
 
 
+def search(params: MultiDict) -> Response:
+    """
+    Handle a search request from the API.
+
+    Parameters
+    ----------
+    params : :class:`MultiDict`
+        GET query parameters from the request.
+
+    Returns
+    -------
+    dict
+        Response data (to serialize).
+    int
+        HTTP status code.
+    dict
+        Extra headers for the response.
+    """
+    q = APIQuery()
+    terms = _get_fielded_terms(params)
+    if terms is not None:
+        q.terms = terms
+    date_range = _get_date_params(params)
+    if date_range is not None:
+        q.date_range = date_range
+
+    classifications = _get_classifications(params)
+    if classifications is not None:
+        q.primary_classification = classifications
+
+    q = paginate(q, params)
+    return asdict(index.search(q, highlight=False)), status.HTTP_200_OK, {}
+
+
 def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
     terms = FieldedSearchList()
     for field, _ in Query.SUPPORTED_FIELDS:
@@ -70,21 +104,3 @@ def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
     if len(classifications) == 0:
         return
     return classifications
-
-
-def search(params: MultiDict) -> Response:
-    """Handle a search request from the API."""
-    q = APIQuery()
-    terms = _get_fielded_terms(params)
-    if terms is not None:
-        q.terms = terms
-    date_range = _get_date_params(params)
-    if date_range is not None:
-        q.date_range = date_range
-
-    classifications = _get_classifications(params)
-    if classifications is not None:
-        q.primary_classification = classifications
-
-    q = paginate(q, params)
-    return asdict(index.search(q, highlight=False)), 200, {}
diff --git a/search/controllers/util.py b/search/controllers/util.py
index 9110ad3f..0444fc6f 100644
--- a/search/controllers/util.py
+++ b/search/controllers/util.py
@@ -52,8 +52,8 @@ def paginate(query: Query, data: dict) -> Query:
     :class:`.Query`
 
     """
-    query.page_start = int(data.get('start', 0))
-    query.page_size = int(data.get('size', 50))
+    query.page_start = max(int(data.get('start', 0)), 0)
+    query.page_size = min(int(data.get('size', 50)), Query.MAXIMUM_PAGE_SIZE)
     return query
 
 
diff --git a/search/domain/base.py b/search/domain/base.py
index 97edddba..6c7cf658 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -125,6 +125,9 @@ def __str__(self) -> str:
 class Query:
     """Represents a search query originating from the UI or API."""
 
+    MAXIMUM_PAGE_SIZE = 500
+    """The maximum number of records that can be retrieved."""
+
     SUPPORTED_FIELDS = [
         ('all', 'All fields'),
         ('title', 'Title'),
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index cee1ea40..262d89ac 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -40,7 +40,7 @@
 from .util import MAX_RESULTS
 from .advanced import advanced_search
 from .simple import simple_search
-from .highlighting import highlight
+from . import highlighting
 from . import results
 
 logger = logging.getLogger(__name__)
@@ -400,7 +400,7 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
         if highlight:
             # Highlighting is performed by Elasticsearch; here we include the
             # fields and configuration for highlighting.
-            current_search = highlight(current_search)
+            current_search = highlighting.highlight(current_search)
 
         with handle_es_exceptions():
             # Slicing the search adds pagination parameters to the request.

From 7e079d64eeb4209b3baed00d4a61b82651691cbc Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Sat, 8 Sep 2018 07:55:29 -0400
Subject: [PATCH 04/77]  ARXIVNG-1177 updating openapi and jsonschema

---
 requirements/dev.txt                         |  45 ---
 requirements/prod.txt                        |  45 ---
 requirements/test.txt                        |   6 -
 schema/DocumentSet.json                      |  41 ---
 schema/{ => resources}/Document.json         |  36 +--
 schema/resources/DocumentMetadata.json       | 301 +++++++++++++++++++
 schema/resources/DocumentSet.json            |  42 +++
 {api => schema}/search.yaml                  |  19 +-
 search/controllers/util.py                   |   2 +-
 search/domain/base.py                        |   8 +-
 search/routes/ui.py                          |   4 +-
 search/services/index/__init__.py            |   8 +-
 search/services/index/results.py             |  24 +-
 search/services/index/tests/tests.py         |   8 +-
 search/templates/search/advanced_search.html |   4 +-
 search/templates/search/search-macros.html   |  14 +-
 search/templates/search/search.html          |   2 +-
 tests/integration/test_search_integration.py |  18 +-
 18 files changed, 427 insertions(+), 200 deletions(-)
 delete mode 100644 requirements/dev.txt
 delete mode 100644 requirements/prod.txt
 delete mode 100644 requirements/test.txt
 delete mode 100644 schema/DocumentSet.json
 rename schema/{ => resources}/Document.json (90%)
 create mode 100644 schema/resources/DocumentMetadata.json
 create mode 100644 schema/resources/DocumentSet.json
 rename {api => schema}/search.yaml (80%)

diff --git a/requirements/dev.txt b/requirements/dev.txt
deleted file mode 100644
index 9a97fe49..00000000
--- a/requirements/dev.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-amazon-kclpy==1.4.4
-arxiv-base==0.6.1
-boto==2.48.0
-boto3==1.6.6
-botocore==1.9.6
-certifi==2017.7.27.1
-chardet==3.0.4
-click==6.7
-coverage==4.4.2
-dataclasses==0.4
-docutils==0.14
-elasticsearch==6.1.1
-elasticsearch-dsl==6.1.0
-Flask==0.12.2
-Flask-S3==0.3.3
-idna==2.6
-ipaddress==1.0.19
-itsdangerous==0.24
-Jinja2==2.10
-jmespath==0.9.3
-jsonschema==2.6.0
-MarkupSafe==1.0
-mccabe==0.6.1
-mock==2.0.0
-mypy==0.560
-nose2==0.7.3
-pbr==3.1.1
-psutil==5.4.3
-pycodestyle==2.3.1
-pydocstyle==2.1.1
-pyflakes==1.6.0
-pylama==7.4.3
-python-dateutil==2.6.1
-pytz==2017.3
-requests==2.18.4
-s3transfer==0.1.13
-six==1.11.0
-snowballstemmer==1.2.1
-thrift==0.11.0
-thrift-connector==0.23
-typed-ast==1.1.0
-urllib3==1.22
-Werkzeug==0.13
-WTForms==2.1
-bleach==2.0.0
diff --git a/requirements/prod.txt b/requirements/prod.txt
deleted file mode 100644
index 9a97fe49..00000000
--- a/requirements/prod.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-amazon-kclpy==1.4.4
-arxiv-base==0.6.1
-boto==2.48.0
-boto3==1.6.6
-botocore==1.9.6
-certifi==2017.7.27.1
-chardet==3.0.4
-click==6.7
-coverage==4.4.2
-dataclasses==0.4
-docutils==0.14
-elasticsearch==6.1.1
-elasticsearch-dsl==6.1.0
-Flask==0.12.2
-Flask-S3==0.3.3
-idna==2.6
-ipaddress==1.0.19
-itsdangerous==0.24
-Jinja2==2.10
-jmespath==0.9.3
-jsonschema==2.6.0
-MarkupSafe==1.0
-mccabe==0.6.1
-mock==2.0.0
-mypy==0.560
-nose2==0.7.3
-pbr==3.1.1
-psutil==5.4.3
-pycodestyle==2.3.1
-pydocstyle==2.1.1
-pyflakes==1.6.0
-pylama==7.4.3
-python-dateutil==2.6.1
-pytz==2017.3
-requests==2.18.4
-s3transfer==0.1.13
-six==1.11.0
-snowballstemmer==1.2.1
-thrift==0.11.0
-thrift-connector==0.23
-typed-ast==1.1.0
-urllib3==1.22
-Werkzeug==0.13
-WTForms==2.1
-bleach==2.0.0
diff --git a/requirements/test.txt b/requirements/test.txt
deleted file mode 100644
index 49bc87f5..00000000
--- a/requirements/test.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-coverage==4.5
-mypy==0.570
-nose2==0.7.3
-pylint==1.8.2
-coveralls==1.3.0
-pydocstyle==2.1.1
diff --git a/schema/DocumentSet.json b/schema/DocumentSet.json
deleted file mode 100644
index 49bd8354..00000000
--- a/schema/DocumentSet.json
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-    "$schema": "http://json-schema.org/schema#",
-    "title": "DocumentSet",
-    "description": "Prototype schema for arXiv documents",
-    "type": "object",
-    "required": ["metadata", "results"],
-    "properties": {
-        "metadata": {
-            "type": "object",
-            "query": {
-                "description": "The request query from which the document set is derived",
-                "type": "string"
-            },
-            "total": {
-                "description": "Total number of results that respond to the query.",
-                "type": "integer",
-                "minimum": 0
-            },
-            "pagination": {
-                "description": "Pagination details",
-                "type": "object",
-                "properties": {
-                    "next": {
-                        "type": "string",
-                        "format": "uri",
-                        "description": "URI for the next page of results"
-                    },
-                    "previous": {
-                        "type": "string",
-                        "format": "uri",
-                        "description": "URI for the previous page of results"
-                    }
-                }
-            }
-        },
-        "results": {
-            "type": "object",
-            "$ref": "Document.json#Document"
-        }
-    }
-}
diff --git a/schema/Document.json b/schema/resources/Document.json
similarity index 90%
rename from schema/Document.json
rename to schema/resources/Document.json
index 74166186..d2327220 100644
--- a/schema/Document.json
+++ b/schema/resources/Document.json
@@ -1,7 +1,7 @@
 {
     "$schema": "http://json-schema.org/schema#",
     "title": "Document",
-    "description": "Prototype schema for arXiv documents",
+    "description": "Schema for arXiv document metadata returned by the search API.",
     "definitions": {
         "category": {
             "type": "object",
@@ -147,21 +147,21 @@
             },
             "minItems": 0
         },
-        "announced_first": {
+        "announced_date_first": {
           "desription": "Date (year and month) the paper was originally announced",
           "type": "string"
-        }
-        "updated_date": {
-            "description": "Date this version of paper was last touched",
-            "type": "string"
         },
         "modified_date": {
             "description": "Date this version of paper .abs file was last modified",
             "type": "string"
         },
-        "document_id": {
-            "description": "Internal, version-independent document identifier",
-            "type": "integer"
+        "paper_id": {
+            "description": "Version-independent arXiv paper identifier.",
+            "type": "string"
+        },
+        "paper_id_v": {
+            "description": "arXiv paper identifier with version affix.",
+            "type": "string"
         },
         "doi": {
             "type": "string"
@@ -209,13 +209,6 @@
             },
             "uniqueItems": true
         },
-        "paper_id": {
-            "type": "string"
-        },
-        "paper_id_v": {
-            "description": "Paper ID with version suffix",
-            "type": "string"
-        },
         "primary_classification": {
             "$ref": "#/definitions/classification"
         },
@@ -224,7 +217,7 @@
             "items": {"$ref": "#/definitions/classification"}
         },
         "proxy": {
-            "type": "string"
+            "type": "boolean"
         },
         "report_num": {
             "type": "string"
@@ -245,7 +238,7 @@
             "required": ["size_bytes"]
         },
         "submitter": {
-            "description": "Submitter data. Name and email may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
+            "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
             "type": "object",
             "properties": {
                 "name": {
@@ -261,24 +254,27 @@
                     "type": "string"
                 }
             },
-            "required": ["email", "name"]
+            "required": ["name"]
         },
         "title": {
             "type": "string"
         },
         "title_utf8": {
-            "description": "title field with texisms converted to utf-8 equivalents",
+            "description": "Title field with texisms converted to utf-8 equivalents",
             "type": "string"
         },
         "version": {
+            "description": "The version number for this paper.",
             "minimum": 1,
             "type": "integer"
         },
         "latest_version": {
+            "description": "Number of the latest version of this paper.",
             "minimum": 1,
             "type": "integer"
         },
         "latest": {
+            "description": "arXiv paper identifier (with version affix) of latest version of this paper.",
             "type": "string"
         }
     },
diff --git a/schema/resources/DocumentMetadata.json b/schema/resources/DocumentMetadata.json
new file mode 100644
index 00000000..84b791f5
--- /dev/null
+++ b/schema/resources/DocumentMetadata.json
@@ -0,0 +1,301 @@
+{
+    "$schema": "http://json-schema.org/schema#",
+    "title": "DocumentMetadata",
+    "description": "Schema for arXiv document metadata provided by the docmeta endpoint.",
+    "definitions": {
+        "category": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            },
+            "required": ["id", "name"]
+        },
+        "archive": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            },
+            "required": ["id", "name"]
+        },
+        "group": {
+            "type": "object",
+            "properties": {
+                "id": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            },
+            "required": ["id", "name"]
+        },
+        "classification": {
+            "type": "object",
+            "properties": {
+                "archive": {"$ref": "#/definitions/archive"},
+                "group": {"$ref": "#/definitions/group"},
+                "category": {"$ref": "#/definitions/category"}
+            }
+        }
+    },
+    "type": "object",
+    "properties": {
+        "abs_categories": {
+            "description": "Categories as they would appear on the /abs page",
+            "type": "string"
+        },
+        "abstract": {
+            "type": "string"
+        },
+        "abstract_utf8": {
+            "description": "abstract field with texisms converted to utf-8 equivalents",
+            "type": "string"
+        },
+        "acm_class": {
+            "description": "Classifications from ACM Computing Classification System",
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "uniqueItems": true
+        },
+        "owners": {
+            "items": {
+                "type": "object",
+                "properties": {
+                    "first_name": {
+                        "type": "string"
+                    },
+                    "last_name": {
+                        "type": "string"
+                    },
+                    "suffix": {
+                        "type": "string"
+                    },
+                    "affiliation": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "minItems": 0
+                    },
+                    "orcid": {
+                        "type": "string"
+                    }
+                },
+                "required": ["last_name"]
+            }
+        },
+        "authors": {
+            "items": {
+                "type": "object",
+                "properties": {
+                    "first_name": {
+                        "type": "string"
+                    },
+                    "last_name": {
+                        "type": "string"
+                    },
+                    "suffix": {
+                        "type": "string"
+                    },
+                    "author_id": {
+                        "type": "string"
+                    },
+                    "orcid": {
+                        "type": "string"
+                    },
+                    "affiliation": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "minItems": 0
+                    }
+                },
+                "required": ["last_name"]
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "comments": {
+            "type": ["string"]
+        },
+        "comments_utf8": {
+            "description": "comments field with texisms converted to utf-8 equivalents",
+            "type": ["string"]
+        },
+        "submitted_date": {
+            "description": "Date this version of paper was submitted",
+            "type": "string"
+        },
+        "submitted_date_all": {
+            "description": "Submitted dates of all of the versions of this paper",
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "minItems": 0
+        },
+        "announced_first": {
+          "desription": "Date (year and month) the paper was originally announced",
+          "type": "string"
+        }
+        "updated_date": {
+            "description": "Date this version of paper was last touched",
+            "type": "string"
+        },
+        "modified_date": {
+            "description": "Date this version of paper .abs file was last modified",
+            "type": "string"
+        },
+        "document_id": {
+            "description": "Internal, version-independent document identifier",
+            "type": "integer"
+        },
+        "doi": {
+            "type": "string"
+        },
+        "formats": {
+            "description": "Derivative paper formats available to users",
+            "type": "array",
+            "minItems": 0
+        },
+        "fulltext": {
+            "type": "string"
+        },
+        "is_current": {
+            "type": "boolean"
+        },
+        "is_withdrawn": {
+            "type": "boolean"
+        },
+        "journal_ref": {
+            "type": ["string"]
+        },
+        "journal_ref_utf8": {
+            "description": "journal_ref field with texisms converted to utf-8 equivalents",
+            "type": ["string"]
+        },
+        "license": {
+            "type": "object",
+            "properties": {
+                "uri": {
+                    "type": ["string", "null"]
+                },
+                "label": {
+                    "type": ["string", "null"]
+                }
+            }
+        },
+        "metadata_id": {
+            "type": "integer"
+        },
+        "msc_class": {
+            "description": "Classifications from American Mathematical Society Mathematical Subject Classification (MSC)",
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "uniqueItems": true
+        },
+        "paper_id": {
+            "type": "string"
+        },
+        "paper_id_v": {
+            "description": "Paper ID with version suffix",
+            "type": "string"
+        },
+        "primary_classification": {
+            "$ref": "#/definitions/classification"
+        },
+        "secondary_classification": {
+            "type": "array",
+            "items": {"$ref": "#/definitions/classification"}
+        },
+        "proxy": {
+            "type": "string"
+        },
+        "report_num": {
+            "type": "string"
+        },
+        "source": {
+            "properties": {
+                "flags": {
+                    "type": ["string", "null"]
+                },
+                "format": {
+                    "type": ["string", "null"]
+                },
+                "size_bytes": {
+                    "minimum": 0,
+                    "type": "integer"
+                }
+            },
+            "required": ["size_bytes"]
+        },
+        "submitter": {
+            "description": "Submitter data. Name and email may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
+            "type": "object",
+            "properties": {
+                "name": {
+                    "type": "string"
+                },
+                "is_author": {
+                    "type": "boolean"
+                },
+                "author_id": {
+                    "type:": "string"
+                },
+                "orcid": {
+                    "type": "string"
+                }
+            },
+            "required": ["email", "name"]
+        },
+        "title": {
+            "type": "string"
+        },
+        "title_utf8": {
+            "description": "title field with texisms converted to utf-8 equivalents",
+            "type": "string"
+        },
+        "version": {
+            "minimum": 1,
+            "type": "integer"
+        },
+        "latest_version": {
+            "minimum": 1,
+            "type": "integer"
+        },
+        "latest": {
+            "type": "string"
+        }
+    },
+    "required": [
+        "abstract",
+        "authors",
+        "submitted_date",
+        "submitted_date_latest",
+        "submitted_date_first",
+        "is_current",
+        "is_withdrawn",
+        "license",
+        "paper_id",
+        "paper_id_v",
+        "primary_classification",
+        "title",
+        "source",
+        "version"
+    ]
+}
diff --git a/schema/resources/DocumentSet.json b/schema/resources/DocumentSet.json
new file mode 100644
index 00000000..7cb584a8
--- /dev/null
+++ b/schema/resources/DocumentSet.json
@@ -0,0 +1,42 @@
+{
+  "$schema": "http://json-schema.org/schema#",
+  "title": "DocumentSet",
+  "description": "A set of documents that respond to a query.",
+  "type": "object",
+  "required": ["metadata", "results"],
+  "properties": {
+    "metadata": {
+      "description": "Summary information about the search, including pagination.",
+      "properties": {
+        "start": {
+          "description": "Offset (zero-based) of first result in this documentset from start of original search results.",
+          "type": "integer"
+        },
+        "end": {
+          "description": "Offset (zero-based) of last result in this documentset from start of original search results.",
+          "type": "integer"
+        },
+        "total": {
+          "description": "Total number of documents that respond to this query.",
+          "type": "integer"
+        },
+        "current_page": {
+          "description": "Estimated page number, based on start and size parameters.",
+          "type": "integer"
+        },
+        "max_pages": {
+          "description": "Estimated last page in the entire search results, based on size and total parameters.",
+          "type": "integer"
+        },
+        "total_pages": {
+          "description": "Estimated number of pages in the entire search results, based on size and total parameters.",
+          "type": "integer"
+        }
+      }
+    },
+    "results": {
+      "type": "object",
+      "$ref": "Document.json#Document"
+    }
+  }
+}
diff --git a/api/search.yaml b/schema/search.yaml
similarity index 80%
rename from api/search.yaml
rename to schema/search.yaml
index b2e75ade..d676caf3 100644
--- a/api/search.yaml
+++ b/schema/search.yaml
@@ -12,13 +12,23 @@ info:
 servers:
   - url: https://arxiv.org/api
 paths:
-  /papers:
+  /:
     get:
       operationId: queryPapers
       description: |
         Returns all published arXiv papers that respond to the specified
         query parameters. By default, returns most recent papers first.
       parameters:
+        - name: all
+          in: query
+          description: |
+            Performs a query across all fields. Has the same behavior as the
+            simple search function on the main arXiv website.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
         - name: primary_category
           in: query
           description: |
@@ -26,6 +36,7 @@ paths:
             should be limited.
           required: false
           style: form
+          explode: true
           schema:
             type: array
             items:
@@ -36,14 +47,14 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '../schema/DocumentSet.json#DocumentSet'
+                $ref: './resources/DocumentSet.json#DocumentSet'
         default:
           description: unexpected error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/Error'
-  /papers/{id}:
+  /{id}:
     get:
       description: Return metadata about an arXiv paper by arXiv ID.
       operationId: getPaperByID
@@ -60,7 +71,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '../schema/Document.json#Document'
+                $ref: './resources/Document.json#Document'
         default:
           description: unexpected error
           content:
diff --git a/search/controllers/util.py b/search/controllers/util.py
index 0444fc6f..a506f090 100644
--- a/search/controllers/util.py
+++ b/search/controllers/util.py
@@ -53,7 +53,7 @@ def paginate(query: Query, data: dict) -> Query:
 
     """
     query.page_start = max(int(data.get('start', 0)), 0)
-    query.page_size = min(int(data.get('size', 50)), Query.MAXIMUM_PAGE_SIZE)
+    query.size = min(int(data.get('size', 50)), Query.MAXIMUM_size)
     return query
 
 
diff --git a/search/domain/base.py b/search/domain/base.py
index 6c7cf658..91dc0596 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -125,7 +125,7 @@ def __str__(self) -> str:
 class Query:
     """Represents a search query originating from the UI or API."""
 
-    MAXIMUM_PAGE_SIZE = 500
+    MAXIMUM_size = 500
     """The maximum number of records that can be retrieved."""
 
     SUPPORTED_FIELDS = [
@@ -147,7 +147,7 @@ class Query:
     ]
 
     order: Optional[str] = field(default=None)
-    page_size: int = field(default=50)
+    size: int = field(default=50)
     page_start: int = field(default=0)
     include_older_versions: bool = field(default=False)
     hide_abstracts: bool = field(default=False)
@@ -155,12 +155,12 @@ class Query:
     @property
     def page_end(self) -> int:
         """Get the index/offset of the end of the page."""
-        return self.page_start + self.page_size
+        return self.page_start + self.size
 
     @property
     def page(self) -> int:
         """Get the approximate page number."""
-        return 1 + int(round(self.page_start/self.page_size))
+        return 1 + int(round(self.page_start/self.size))
 
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""
diff --git a/search/routes/ui.py b/search/routes/ui.py
index b0ac8be5..efe8341d 100644
--- a/search/routes/ui.py
+++ b/search/routes/ui.py
@@ -151,12 +151,12 @@ def external_url(service: str, name: str, **parameters: Any) \
 @blueprint.context_processor
 def url_for_page_builder() -> Dict[str, Callable]:
     """Add a page URL builder function to the template context."""
-    def url_for_page(page: int, page_size: int) -> str:
+    def url_for_page(page: int, size: int) -> str:
         """Build an URL to for a search result page."""
         rule = request.url_rule
         parts = url_parse(url_for(rule.endpoint))
         args = request.args.copy()
-        args['start'] = (page - 1) * page_size
+        args['start'] = (page - 1) * size
         parts = parts.replace(query=url_encode(args))
         url: str = url_unparse(parts)
         return url
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index 262d89ac..84d9f0d5 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -270,7 +270,8 @@ def add_document(self, document: Document) -> None:
         Parameters
         ----------
         document : :class:`.Document`
-            Must be a valid search document, per ``schema/Document.json``.
+            Must be a valid search document, per
+            ``schema/DocumentMetadata.json``.
 
         Raises
         ------
@@ -297,7 +298,8 @@ def bulk_add_documents(self, documents: List[Document],
         Parameters
         ----------
         document : :class:`.Document`
-            Must be a valid search document, per ``schema/Document.json``.
+            Must be a valid search document, per
+            ``schema/DocumentMetadata.json``.
         docs_per_chunk: int
             Number of documents to send to ES in a single chunk
         Raises
@@ -379,7 +381,7 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
 
         """
         # Make sure that the user is not requesting a nonexistant page.
-        max_pages = int(MAX_RESULTS/query.page_size)
+        max_pages = int(MAX_RESULTS/query.size)
         if query.page > max_pages:
             _message = f'Requested page {query.page}, but max is {max_pages}'
             logger.error(_message)
diff --git a/search/services/index/results.py b/search/services/index/results.py
index 88f89db5..fe188564 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -21,11 +21,16 @@
 logger.propagate = False
 
 
+def _to_author(author_data: dict) -> dict:
+    """Prevent e-mail, other extraneous data, from escaping."""
+    return {k: v for k, v in author_data.items() if k != 'email'}
+
+
 def _to_document(raw: Response, highlight: bool = True) -> Document:
     """Transform an ES search result back into a :class:`.Document`."""
     # typing: ignore
     result: Dict[str, Any] = {}
-    result['highlight'] = {}
+
     result['match'] = {}  # Hit on field, but no highlighting.
     result['truncated'] = {}    # Preview is truncated.
     for key in Document.fields():
@@ -40,6 +45,11 @@ def _to_document(raw: Response, highlight: bool = True) -> Document:
         elif isinstance(value, AttrDict):
             value = value.to_dict()
 
+        if key in ['authors', 'owners']:
+            value = [_to_author(au) for au in value]
+        elif key == 'submitter':
+            value = _to_author(value)
+
         if key == 'announced_date_first' and value and isinstance(value, str):
             value = datetime.strptime(value, '%Y-%m').date()
         if key in ['submitted_date', 'submitted_date_first',
@@ -62,8 +72,10 @@ def _to_document(raw: Response, highlight: bool = True) -> Document:
             result['truncated']['abstract'] = True
 
     if highlight:
+        result['highlight'] = {}
         logger.debug('%s: add highlighting to result', raw.paper_id)
         result = add_highlighting(result, raw)
+    
     return Document(**result)   # type: ignore
     # See https://github.com/python/mypy/issues/3937
 
@@ -87,21 +99,21 @@ def to_documentset(query: Query, response: Response, highlight: bool = True) \
         page, along with pagination metadata.
 
     """
-    max_pages = int(MAX_RESULTS/query.page_size)
-    N_pages_raw = response['hits']['total']/query.page_size
+    max_pages = int(MAX_RESULTS/query.size)
+    N_pages_raw = response['hits']['total']/query.size
     N_pages = int(floor(N_pages_raw)) + \
-        int(N_pages_raw % query.page_size > 0)
+        int(N_pages_raw % query.size > 0)
     logger.debug('got %i results', response['hits']['total'])
 
     return DocumentSet(**{  # type: ignore
         'metadata': {
             'start': query.page_start,
-            'end': min(query.page_start + query.page_size,
+            'end': min(query.page_start + query.size,
                        response['hits']['total']),
             'total': response['hits']['total'],
             'current_page': query.page,
             'total_pages': N_pages,
-            'page_size': query.page_size,
+            'size': query.size,
             'max_pages': max_pages
         },
         'results': [_to_document(raw, highlight=highlight) for raw in response]
diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index 34d5e3ec..e671c1fa 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -41,7 +41,7 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
 
         query = AdvancedQuery(
             order='relevance',
-            page_size=10,
+            size=10,
             date_range=DateRange(
                 start_date=datetime.now() - timedelta(days=5),
                 end_date=datetime.now()
@@ -80,7 +80,7 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
         self.assertEqual(document_set.metadata['total'], 53)
         self.assertEqual(document_set.metadata['current_page'], 1)
         self.assertEqual(document_set.metadata['total_pages'], 6)
-        self.assertEqual(document_set.metadata['page_size'], 10)
+        self.assertEqual(document_set.metadata['size'], 10)
         self.assertEqual(len(document_set.results), 1)
 
     @mock.patch('search.services.index.Search')
@@ -105,7 +105,7 @@ def test_simple_query(self, mock_Elasticsearch, mock_Search):
 
         query = SimpleQuery(
             order='relevance',
-            page_size=10,
+            size=10,
             search_field='title',
             value='foo title'
         )
@@ -115,7 +115,7 @@ def test_simple_query(self, mock_Elasticsearch, mock_Search):
         self.assertEqual(document_set.metadata['total'], 53)
         self.assertEqual(document_set.metadata['current_page'], 1)
         self.assertEqual(document_set.metadata['total_pages'], 6)
-        self.assertEqual(document_set.metadata['page_size'], 10)
+        self.assertEqual(document_set.metadata['size'], 10)
         self.assertEqual(len(document_set.results), 1)
 
 
diff --git a/search/templates/search/advanced_search.html b/search/templates/search/advanced_search.html
index 7a2efb42..092abd9f 100644
--- a/search/templates/search/advanced_search.html
+++ b/search/templates/search/advanced_search.html
@@ -328,7 +328,7 @@
     <div class="is-clearfix" style="height: 2.5em"> {# TODO - adjust this layout so that it matches across all forms less awkwardly #}
       <div class="is-pulled-right">
         {% if query %}
-        <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.page_size}) }}">Simple Search</a>
+        <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
         {% else %}
         <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
         {% endif %}
@@ -353,7 +353,7 @@
       <div class="level-right">
         <div class="level-item">
           {% if query %}
-          <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.page_size}) }}">Simple Search</a>
+          <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
           {% else %}
           <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
           {% endif %}
diff --git a/search/templates/search/search-macros.html b/search/templates/search/search-macros.html
index d4ffec14..a7b64000 100644
--- a/search/templates/search/search-macros.html
+++ b/search/templates/search/search-macros.html
@@ -34,7 +34,7 @@
 {% macro pagination(metadata, url_for_page) -%}
   <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination">
     {% if metadata.current_page > 1 %}
-    <a href="{{ url_for_page(metadata.current_page - 1, metadata.page_size) }}"
+    <a href="{{ url_for_page(metadata.current_page - 1, metadata.size) }}"
       class="pagination-previous">Previous
     </a>
     {% else %}
@@ -43,7 +43,7 @@
     </a>
     {% endif %}
     {% if metadata.current_page < metadata.total_pages %}
-      <a href="{% if metadata.current_page + 1 <= metadata.max_pages %}{{ url_for_page(metadata.current_page + 1, metadata.page_size) }}{% endif %}"
+      <a href="{% if metadata.current_page + 1 <= metadata.max_pages %}{{ url_for_page(metadata.current_page + 1, metadata.size) }}{% endif %}"
         class="pagination-next" {% if metadata.current_page + 1 > metadata.max_pages %}disabled="true"{% endif %}>Next
       </a>
     {% else %}
@@ -54,7 +54,7 @@
     <ul class="pagination-list">
 
       <li>
-        <a href="{{ url_for_page(1, metadata.page_size) }}"
+        <a href="{{ url_for_page(1, metadata.size) }}"
           class="pagination-link {% if metadata.current_page == 1 %}is-current{% endif %}"
           aria-label="Goto page 1">1
         </a>
@@ -63,7 +63,7 @@
       {% if metadata.total_pages < 8 %}
         {% for p in range(2, metadata.total_pages + 1) %}
         <li>
-          <a href="{{ url_for_page(p, metadata.page_size) }}"
+          <a href="{{ url_for_page(p, metadata.size) }}"
             class="pagination-link {% if metadata.current_page == p %}is-current{% endif %}"
             aria-label="Page {{ p }}"
             aria-current="page">{{ p }}
@@ -74,7 +74,7 @@
         {% if metadata.current_page <= 3 %}                             {# Close to the start page #}
           {% for p in range(2, 6) %}
           <li>
-            <a href="{{ url_for_page(p, metadata.page_size) }}"
+            <a href="{{ url_for_page(p, metadata.size) }}"
               class="pagination-link {% if metadata.current_page == p %}is-current{% endif %}"
               aria-label="Page {{ p }}"
               aria-current="page">{{ p }}
@@ -86,7 +86,7 @@
           <li><span class="pagination-ellipsis">&hellip;</span></li>
           {% for p in range(metadata.total_pages - 4, metadata.total_pages + 1) %}
           <li>
-            <a href="{{ url_for_page(p, metadata.page_size) }}"
+            <a href="{{ url_for_page(p, metadata.size) }}"
               class="pagination-link {% if metadata.current_page == p %}is-current{% endif %}"
               aria-label="Page {{ p }}"
               aria-current="page">{{ p }}
@@ -98,7 +98,7 @@
           {% for p in range(metadata.current_page - 1, metadata.current_page + 2) %}
           {% if p <= metadata.max_pages %}
           <li>
-            <a href="{{ url_for_page(p, metadata.page_size) }}"
+            <a href="{{ url_for_page(p, metadata.size) }}"
               class="pagination-link {% if metadata.current_page == p %}is-current{% endif %}"
               aria-label="Page {{ p }}"
               aria-current="page">{{ p }}
diff --git a/search/templates/search/search.html b/search/templates/search/search.html
index 29c3061e..2a3929b3 100644
--- a/search/templates/search/search.html
+++ b/search/templates/search/search.html
@@ -55,7 +55,7 @@
     <div class="is-clearfix" style="height: 2.5em"> {# TODO - adjust this layout so that it matches across all forms less awkwardly #}
       <div class="is-pulled-right">
         {% if query %}
-        <a href="{{ url_with_params("ui.advanced_search", {}, {"terms-0-term": query.value, "terms-0-field": query.search_field, "size": query.page_size, "order": query.order}) }}">Advanced Search</a>
+        <a href="{{ url_with_params("ui.advanced_search", {}, {"terms-0-term": query.value, "terms-0-field": query.search_field, "size": query.size, "order": query.order}) }}">Advanced Search</a>
         {% else %}
         <a href="{{ url_for('ui.advanced_search') }}">Advanced Search</a>
         {% endif %}
diff --git a/tests/integration/test_search_integration.py b/tests/integration/test_search_integration.py
index a343a4e4..ce4f9c05 100644
--- a/tests/integration/test_search_integration.py
+++ b/tests/integration/test_search_integration.py
@@ -117,7 +117,7 @@
 #         # When a user performs a search...
 #         query = SimpleQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             field='all',
 #             value='flux capacitor'
 #         )
@@ -138,7 +138,7 @@
 #         # A search for a TeX expression should match similar metadata strings.
 #         query = SimpleQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             field='all',
 #             value='λ'
 #         )
@@ -151,7 +151,7 @@
 #         """Scenario: simple search for TeX terms."""
 #         query = SimpleQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             field='all',
 #             value='$Z_1(4475)$'
 #         )
@@ -164,7 +164,7 @@
 #         """Scenario: simple search for TeX terms."""
 #         query = SimpleQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             field='all',
 #             value='$\Lambda$'
 #         )
@@ -174,7 +174,7 @@
 #
 #         query = SimpleQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             field='all',
 #             value='$\Lambda\Lambda$'
 #         )
@@ -187,7 +187,7 @@
 #         search_year = 2015
 #         query = AdvancedQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             date_range=DateRange(
 #                 start_date=datetime(year=2015, month=1, day=1, tzinfo=EASTERN),
 #                 end_date=datetime(year=2016, month=1, day=1, tzinfo=EASTERN)
@@ -207,7 +207,7 @@
 #         """Scenario: multiple terms search success."""
 #         query = AdvancedQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             terms=FieldedSearchList([
 #                 FieldedSearchTerm(operator='AND', field='author',
 #                                   term='schroder'),
@@ -226,7 +226,7 @@
 #         """Scenario: multiple terms search success."""
 #         query = AdvancedQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             include_older_versions=True,
 #             terms=FieldedSearchList([
 #                 FieldedSearchTerm(operator='AND', field='all',
@@ -246,7 +246,7 @@
 #         """Scenario: multiple terms with no results."""
 #         query = AdvancedQuery(
 #             order='',
-#             page_size=10,
+#             size=10,
 #             terms=FieldedSearchList([
 #                 FieldedSearchTerm(operator='AND', field='author',
 #                                   term='schroder'),

From e4552602bcb505d0f4dd4004fed032afcd774985 Mon Sep 17 00:00:00 2001
From: Erick <erick.peirson@gmail.com>
Date: Tue, 11 Sep 2018 09:58:13 -0400
Subject: [PATCH 05/77] Update Pipfile

---
 Pipfile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Pipfile b/Pipfile
index b8c9c2d1..e9d658f1 100644
--- a/Pipfile
+++ b/Pipfile
@@ -50,8 +50,6 @@ typed-ast = "==1.1.0"
 werkzeug = "==0.13"
 wtforms = "==2.1"
 bleach = "*"
-"17e504d" = {path = "./../arxiv-base-ui"}
-
 
 [dev-packages]
 

From 8aedaf39b364bb9c6b27c2c32b682b140817104c Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 13:10:04 -0400
Subject: [PATCH 06/77] ARXIVNG-1177 working on serialization

---
 Pipfile                                   |   8 +-
 Pipfile.lock                              |  67 ++-
 schema/resources/Document.json            | 488 +++++++++-------------
 schema/resources/DocumentMetadata.json    |   2 +-
 schema/resources/DocumentSet.json         |  12 -
 schema/resources/Person.json              |  49 +++
 schema/search.yaml                        |   2 +-
 search/config.py                          |  14 +
 search/controllers/api/__init__.py        |  36 +-
 search/domain/base.py                     |  27 +-
 search/routes/{api.py => api/__init__.py} |  22 +-
 search/routes/api/serialize.py            | 168 ++++++++
 search/routes/api/tests/__init__.py       |   0
 search/routes/api/tests/test_serialize.py |   3 +
 search/services/index/__init__.py         |   5 +-
 search/services/index/results.py          |  55 ++-
 search/services/index/tests/tests.py      |  12 +-
 17 files changed, 629 insertions(+), 341 deletions(-)
 create mode 100644 schema/resources/Person.json
 rename search/routes/{api.py => api/__init__.py} (53%)
 create mode 100644 search/routes/api/serialize.py
 create mode 100644 search/routes/api/tests/__init__.py
 create mode 100644 search/routes/api/tests/test_serialize.py

diff --git a/Pipfile b/Pipfile
index 0b4c7b5f..84d8a0c2 100644
--- a/Pipfile
+++ b/Pipfile
@@ -1,13 +1,10 @@
 [[source]]
-
 url = "https://pypi.python.org/simple"
 verify_ssl = true
 name = "pypi"
 
-
 [packages]
-
-arxiv-base = "==0.6.1"
+arxiv-base = "==0.11.1rc5"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
@@ -50,8 +47,7 @@ typed-ast = "==1.1.0"
 werkzeug = "==0.13"
 wtforms = "==2.1"
 bleach = "*"
-
+lxml = "*"
 
 [dev-packages]
-
 coveralls = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 6b6a0b31..29767b1e 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "26b9ee6a01276378844c890e95157932326d43062f134a35f345e19371d9f678"
+            "sha256": "8f191c5bd0276ba424857b8ccd030e33606f6faa4ae022d84dbcdd1eacb1692e"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -16,18 +16,18 @@
     "default": {
         "arxiv-base": {
             "hashes": [
-                "sha256:54836ab321c9c10c2dbf1d0478120a4c4961c2eb982716d81596579277bafe5f"
+                "sha256:e8f8504a4b8ca5a190a3572882cd1cfd544b071cc2deec1e62d90635787da66f"
             ],
             "index": "pypi",
-            "version": "==0.6.1"
+            "version": "==0.11.1rc5"
         },
         "bleach": {
             "hashes": [
-                "sha256:b8fa79e91f96c2c2cd9fd1f9eda906efb1b88b483048978ba62fef680e962b34",
-                "sha256:eb7386f632349d10d9ce9d4a838b134d4731571851149f9cc2c05a9a837a9a44"
+                "sha256:0ee95f6167129859c5dce9b1ca291ebdb5d8cd7e382ca0e237dfd0dad63f63d8",
+                "sha256:24754b9a7d530bf30ce7cbc805bc6cce785660b4a10ff3a43633728438c105ab"
             ],
             "index": "pypi",
-            "version": "==2.1.3"
+            "version": "==2.1.4"
         },
         "boto": {
             "hashes": [
@@ -226,6 +226,42 @@
             "index": "pypi",
             "version": "==2.6.0"
         },
+        "lxml": {
+            "hashes": [
+                "sha256:02bc220d61f46e9b9d5a53c361ef95e9f5e1d27171cd461dddb17677ae2289a5",
+                "sha256:22f253b542a342755f6cfc047fe4d3a296515cf9b542bc6e261af45a80b8caf6",
+                "sha256:2f31145c7ff665b330919bfa44aacd3a0211a76ca7e7b441039d2a0b0451e415",
+                "sha256:36720698c29e7a9626a0dc802ef8885f8f0239bfd1689628ecd459a061f2807f",
+                "sha256:438a1b0203545521f6616132bfe0f4bca86f8a401364008b30e2b26ec408ce85",
+                "sha256:4815892904c336bbaf73dafd54f45f69f4021c22b5bad7332176bbf4fb830568",
+                "sha256:5be031b0f15ad63910d8e5038b489d95a79929513b3634ad4babf77100602588",
+                "sha256:5c93ae37c3c588e829b037fdfbd64a6e40c901d3f93f7beed6d724c44829a3ad",
+                "sha256:60842230678674cdac4a1cf0f707ef12d75b9a4fc4a565add4f710b5fcf185d5",
+                "sha256:62939a8bb6758d1bf923aa1c13f0bcfa9bf5b2fc0f5fa917a6e25db5fe0cfa4e",
+                "sha256:75830c06a62fe7b8fe3bbb5f269f0b308f19f3949ac81cfd40062f47c1455faf",
+                "sha256:81992565b74332c7c1aff6a913a3e906771aa81c9d0c68c68113cffcae45bc53",
+                "sha256:8c892fb0ee52c594d9a7751c7d7356056a9682674b92cc1c4dc968ff0f30c52f",
+                "sha256:9d862e3cf4fc1f2837dedce9c42269c8c76d027e49820a548ac89fdcee1e361f",
+                "sha256:a623965c086a6e91bb703d4da62dabe59fe88888e82c4117d544e11fd74835d6",
+                "sha256:a7783ab7f6a508b0510490cef9f857b763d796ba7476d9703f89722928d1e113",
+                "sha256:aab09fbe8abfa3b9ce62aaf45aca2d28726b1b9ee44871dbe644050a2fff4940",
+                "sha256:abf181934ac3ef193832fb973fd7f6149b5c531903c2ec0f1220941d73eee601",
+                "sha256:ae07fa0c115733fce1e9da96a3ac3fa24801742ca17e917e0c79d63a01eeb843",
+                "sha256:b9c78242219f674ab645ec571c9a95d70f381319a23911941cd2358a8e0521cf",
+                "sha256:bccb267678b870d9782c3b44d0cefe3ba0e329f9af8c946d32bf3778e7a4f271",
+                "sha256:c4df4d27f4c93b2cef74579f00b1d3a31a929c7d8023f870c4b476f03a274db4",
+                "sha256:caf0e50b546bb60dfa99bb18dfa6748458a83131ecdceaf5c071d74907e7e78a",
+                "sha256:d3266bd3ac59ac4edcd5fa75165dee80b94a3e5c91049df5f7c057ccf097551c",
+                "sha256:db0d213987bcd4e6d41710fb4532b22315b0d8fb439ff901782234456556aed1",
+                "sha256:dbbd5cf7690a40a9f0a9325ab480d0fccf46d16b378eefc08e195d84299bfae1",
+                "sha256:e16e07a0ec3a75b5ee61f2b1003c35696738f937dc8148fbda9fe2147ccb6e61",
+                "sha256:e175a006725c7faadbe69e791877d09936c0ef2cf49d01b60a6c1efcb0e8be6f",
+                "sha256:edd9c13a97f6550f9da2236126bb51c092b3b1ce6187f2bd966533ad794bbb5e",
+                "sha256:fa39ea60d527fbdd94215b5e5552f1c6a912624521093f1384a491a8ad89ad8b"
+            ],
+            "index": "pypi",
+            "version": "==4.2.5"
+        },
         "markupsafe": {
             "hashes": [
                 "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
@@ -289,8 +325,6 @@
         },
         "pycodestyle": {
             "hashes": [
-                "sha256:1ec08a51c901dfe44921576ed6e4c1f5b7ecbad403f871397feedb5eb8e4fa14",
-                "sha256:5ff2fbcbab997895ba9ead77e1b38b3ebc2e5c3b8a6194ef918666e4c790a00e",
                 "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766",
                 "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9"
             ],
@@ -394,6 +428,8 @@
         "typed-ast": {
             "hashes": [
                 "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
+                "sha256:10703d3cec8dcd9eef5a630a04056bbc898abc19bac5691612acba7d1325b66d",
+                "sha256:1f6c4bd0bdc0f14246fd41262df7dfc018d65bb05f6e16390b7ea26ca454a291",
                 "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
                 "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
                 "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
@@ -405,6 +441,9 @@
                 "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
                 "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
                 "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
+                "sha256:898f818399cafcdb93cbbe15fc83a33d05f18e29fb498ddc09b0214cdfc7cd51",
+                "sha256:94b091dc0f19291adcb279a108f5d38de2430411068b219f41b343c03b28fb1f",
+                "sha256:a26863198902cda15ab4503991e8cf1ca874219e0118cbf07c126bce7c4db129",
                 "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
                 "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
                 "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
@@ -423,6 +462,12 @@
             "index": "pypi",
             "version": "==1.22"
         },
+        "uwsgi": {
+            "hashes": [
+                "sha256:d2318235c74665a60021a4fc7770e9c2756f9fc07de7b8c22805efe85b5ab277"
+            ],
+            "version": "==2.0.17.1"
+        },
         "webencodings": {
             "hashes": [
                 "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78",
@@ -511,11 +556,11 @@
         },
         "coveralls": {
             "hashes": [
-                "sha256:32569a43c9dbc13fa8199247580a4ab182ef439f51f65bb7f8316d377a1340e8",
-                "sha256:664794748d2e5673e347ec476159a9d87f43e0d2d44950e98ed0e27b98da8346"
+                "sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22",
+                "sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a"
             ],
             "index": "pypi",
-            "version": "==1.3.0"
+            "version": "==1.5.0"
         },
         "docopt": {
             "hashes": [
diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index d2327220..d67f672d 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -1,297 +1,221 @@
 {
-    "$schema": "http://json-schema.org/schema#",
-    "title": "Document",
-    "description": "Schema for arXiv document metadata returned by the search API.",
-    "definitions": {
-        "category": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
-        },
-        "archive": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
-        },
-        "group": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
+  "$schema": "http://json-schema.org/schema#",
+  "title": "Document",
+  "description": "Schema for arXiv document metadata returned by the search API.",
+  "definitions": {
+    "category": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
         },
-        "classification": {
-            "type": "object",
-            "properties": {
-                "archive": {"$ref": "#/definitions/archive"},
-                "group": {"$ref": "#/definitions/group"},
-                "category": {"$ref": "#/definitions/category"}
-            }
+        "name": {
+          "type": "string"
         }
+      },
+      "required": ["id", "name"]
     },
-    "type": "object",
-    "properties": {
-        "abs_categories": {
-            "description": "Categories as they would appear on the /abs page",
-            "type": "string"
-        },
-        "abstract": {
-            "type": "string"
-        },
-        "abstract_utf8": {
-            "description": "abstract field with texisms converted to utf-8 equivalents",
-            "type": "string"
-        },
-        "acm_class": {
-            "description": "Classifications from ACM Computing Classification System",
-            "type": "array",
-            "items": {
-                "type": "string"
-            },
-            "uniqueItems": true
-        },
-        "owners": {
-            "items": {
-                "type": "object",
-                "properties": {
-                    "first_name": {
-                        "type": "string"
-                    },
-                    "last_name": {
-                        "type": "string"
-                    },
-                    "suffix": {
-                        "type": "string"
-                    },
-                    "affiliation": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "minItems": 0
-                    },
-                    "orcid": {
-                        "type": "string"
-                    }
-                },
-                "required": ["last_name"]
-            }
-        },
-        "authors": {
-            "items": {
-                "type": "object",
-                "properties": {
-                    "first_name": {
-                        "type": "string"
-                    },
-                    "last_name": {
-                        "type": "string"
-                    },
-                    "suffix": {
-                        "type": "string"
-                    },
-                    "author_id": {
-                        "type": "string"
-                    },
-                    "orcid": {
-                        "type": "string"
-                    },
-                    "affiliation": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "minItems": 0
-                    }
-                },
-                "required": ["last_name"]
-            },
-            "minItems": 1,
-            "type": "array"
-        },
-        "comments": {
-            "type": ["string"]
-        },
-        "comments_utf8": {
-            "description": "comments field with texisms converted to utf-8 equivalents",
-            "type": ["string"]
-        },
-        "submitted_date": {
-            "description": "Date this version of paper was submitted",
-            "type": "string"
-        },
-        "submitted_date_all": {
-            "description": "Submitted dates of all of the versions of this paper",
-            "type": "array",
-            "items": {
-                "type": "string"
-            },
-            "minItems": 0
-        },
-        "announced_date_first": {
-          "desription": "Date (year and month) the paper was originally announced",
+    "archive": {
+      "type": "object",
+      "properties": {
+        "id": {
           "type": "string"
         },
-        "modified_date": {
-            "description": "Date this version of paper .abs file was last modified",
-            "type": "string"
-        },
-        "paper_id": {
-            "description": "Version-independent arXiv paper identifier.",
-            "type": "string"
-        },
-        "paper_id_v": {
-            "description": "arXiv paper identifier with version affix.",
-            "type": "string"
-        },
-        "doi": {
-            "type": "string"
-        },
-        "formats": {
-            "description": "Derivative paper formats available to users",
-            "type": "array",
-            "minItems": 0
-        },
-        "fulltext": {
-            "type": "string"
-        },
-        "is_current": {
-            "type": "boolean"
-        },
-        "is_withdrawn": {
-            "type": "boolean"
-        },
-        "journal_ref": {
-            "type": ["string"]
-        },
-        "journal_ref_utf8": {
-            "description": "journal_ref field with texisms converted to utf-8 equivalents",
-            "type": ["string"]
-        },
-        "license": {
-            "type": "object",
-            "properties": {
-                "uri": {
-                    "type": ["string", "null"]
-                },
-                "label": {
-                    "type": ["string", "null"]
-                }
-            }
-        },
-        "metadata_id": {
-            "type": "integer"
-        },
-        "msc_class": {
-            "description": "Classifications from American Mathematical Society Mathematical Subject Classification (MSC)",
-            "type": "array",
-            "items": {
-                "type": "string"
-            },
-            "uniqueItems": true
-        },
-        "primary_classification": {
-            "$ref": "#/definitions/classification"
-        },
-        "secondary_classification": {
-            "type": "array",
-            "items": {"$ref": "#/definitions/classification"}
-        },
-        "proxy": {
-            "type": "boolean"
-        },
-        "report_num": {
-            "type": "string"
-        },
-        "source": {
-            "properties": {
-                "flags": {
-                    "type": ["string", "null"]
-                },
-                "format": {
-                    "type": ["string", "null"]
-                },
-                "size_bytes": {
-                    "minimum": 0,
-                    "type": "integer"
-                }
-            },
-            "required": ["size_bytes"]
-        },
-        "submitter": {
-            "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
-            "type": "object",
-            "properties": {
-                "name": {
-                    "type": "string"
-                },
-                "is_author": {
-                    "type": "boolean"
-                },
-                "author_id": {
-                    "type:": "string"
-                },
-                "orcid": {
-                    "type": "string"
-                }
-            },
-            "required": ["name"]
+        "name": {
+          "type": "string"
+        }
+      },
+      "required": ["id", "name"]
+    },
+    "group": {
+      "type": "object",
+      "properties": {
+        "id": {
+          "type": "string"
         },
-        "title": {
+        "name": {
+          "type": "string"
+        }
+      },
+      "required": ["id", "name"]
+    },
+    "classification": {
+      "type": "object",
+      "properties": {
+        "archive": {"$ref": "#/definitions/archive"},
+        "group": {"$ref": "#/definitions/group"},
+        "category": {"$ref": "#/definitions/category"}
+      }
+    }
+  },
+  "type": "object",
+  "properties": {
+    "abs_categories": {
+      "description": "Categories as they would appear on the /abs page",
+      "type": "string"
+    },
+    "abstract": {
+      "type": "string"
+    },
+    "acm_class": {
+      "description": "Classifications from ACM Computing Classification System",
+      "type": "array",
+      "items": {
+        "type": "string"
+      },
+      "uniqueItems": true
+    },
+    "owners": {
+      "items": {
+        "type": "object",
+        "$ref": "Person.json#Person"
+      }
+    },
+    "authors": {
+      "items": {
+        "type": "object",
+        "$ref": "Person.json#Person"
+      },
+      "minItems": 1,
+      "type": "array"
+    },
+    "comments": {
+      "type": ["string"]
+    },
+    "submitted_date": {
+      "description": "Date this version of paper was submitted.",
+      "type": "string",
+      "format": "date-time"
+    },
+    "announced_date_first": {
+      "desription": "Year and month (``%Y-%m``) the paper was originally announced.",
+      "type": "string"
+    },
+    "paper_id": {
+      "description": "arXiv paper identifier with version affix.",
+      "type": "string"
+    },
+    "doi": {
+      "type": "string"
+    },
+    "formats": {
+      "description": "Derivative paper formats available to users",
+      "type": "array",
+      "minItems": 0,
+      "items": {
+        "type": "object",
+        "properties": {
+          "format": {
             "type": "string"
+          },
+          "href": {
+            "type": "string",
+            "format": "uri"
+          }
+        }
+      }
+    },
+    "fulltext": {
+      "type": "string"
+    },
+    "is_current": {
+      "type": "boolean"
+    },
+    "is_withdrawn": {
+      "type": "boolean"
+    },
+    "journal_ref": {
+      "type": ["string"]
+    },
+    "license": {
+      "type": "object",
+      "properties": {
+        "href": {
+          "type": ["string", "null"]
+        },
+        "label": {
+          "type": ["string", "null"]
+        }
+      }
+    },
+    "msc_class": {
+      "description": "Classifications from American Mathematical Society Mathematical Subject Classification (MSC)",
+      "type": "array",
+      "items": {
+        "type": "string"
+      },
+      "uniqueItems": true
+    },
+    "primary_classification": {
+      "$ref": "#/definitions/classification"
+    },
+    "secondary_classification": {
+      "type": "array",
+      "items": {"$ref": "#/definitions/classification"}
+    },
+    "report_num": {
+      "type": "string"
+    },
+    "source": {
+      "properties": {
+        "flags": {
+          "type": ["string", "null"]
         },
-        "title_utf8": {
-            "description": "Title field with texisms converted to utf-8 equivalents",
-            "type": "string"
+        "format": {
+          "type": ["string", "null"]
         },
+        "size_bytes": {
+          "minimum": 0,
+          "type": "integer"
+        }
+      },
+      "required": ["size_bytes"]
+    },
+    "submitter": {
+      "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
+      "type": "object",
+      "$ref": "Person.json#Person"
+    },
+    "title": {
+      "type": "string"
+    },
+    "version": {
+      "description": "The version number for this paper.",
+      "minimum": 1,
+      "type": "integer"
+    },
+    "latest": {
+      "description": "arXiv paper identifier (with version affix) of latest version of this paper.",
+      "type": "object",
+      "properties": {
         "version": {
-            "description": "The version number for this paper.",
-            "minimum": 1,
-            "type": "integer"
+          "description": "Number of the latest version of this paper.",
+          "minimum": 1,
+          "type": "integer"
         },
-        "latest_version": {
-            "description": "Number of the latest version of this paper.",
-            "minimum": 1,
-            "type": "integer"
+        "href": {
+          "type": "string",
+          "format": "uri"
         },
-        "latest": {
-            "description": "arXiv paper identifier (with version affix) of latest version of this paper.",
-            "type": "string"
+        "paper_id": {
+          "description": "Paper ID with version affix of latest version.",
+          "type": "string"
         }
-    },
-    "required": [
-        "abstract",
-        "authors",
-        "submitted_date",
-        "submitted_date_latest",
-        "submitted_date_first",
-        "is_current",
-        "is_withdrawn",
-        "license",
-        "paper_id",
-        "paper_id_v",
-        "primary_classification",
-        "title",
-        "source",
-        "version"
-    ]
+      }
+    }
+  },
+  "required": [
+    "abstract",
+    "authors",
+    "submitted_date",
+    "is_current",
+    "is_withdrawn",
+    "license",
+    "paper_id",
+    "paper_id_v",
+    "primary_classification",
+    "title",
+    "source",
+    "version"
+  ]
 }
diff --git a/schema/resources/DocumentMetadata.json b/schema/resources/DocumentMetadata.json
index 84b791f5..ee7f1203 100644
--- a/schema/resources/DocumentMetadata.json
+++ b/schema/resources/DocumentMetadata.json
@@ -150,7 +150,7 @@
         "announced_first": {
           "desription": "Date (year and month) the paper was originally announced",
           "type": "string"
-        }
+        },
         "updated_date": {
             "description": "Date this version of paper was last touched",
             "type": "string"
diff --git a/schema/resources/DocumentSet.json b/schema/resources/DocumentSet.json
index 7cb584a8..9c894a6d 100644
--- a/schema/resources/DocumentSet.json
+++ b/schema/resources/DocumentSet.json
@@ -19,18 +19,6 @@
         "total": {
           "description": "Total number of documents that respond to this query.",
           "type": "integer"
-        },
-        "current_page": {
-          "description": "Estimated page number, based on start and size parameters.",
-          "type": "integer"
-        },
-        "max_pages": {
-          "description": "Estimated last page in the entire search results, based on size and total parameters.",
-          "type": "integer"
-        },
-        "total_pages": {
-          "description": "Estimated number of pages in the entire search results, based on size and total parameters.",
-          "type": "integer"
         }
       }
     },
diff --git a/schema/resources/Person.json b/schema/resources/Person.json
new file mode 100644
index 00000000..3b386c00
--- /dev/null
+++ b/schema/resources/Person.json
@@ -0,0 +1,49 @@
+{
+  "$schema": "http://json-schema.org/schema#",
+  "title": "Person",
+  "description": "Schema for person in metadata returned by the search API.",
+  "type": "object",
+  "properties": {
+    "full_name": {
+      "description": "The fullest representation of the person's name available in arXiv metadata.",
+      "type": "string"
+    },
+    "last_name": {
+      "description": "The family or surname part of the person's name, if available.",
+      "type": "string"
+    },
+    "first_name": {
+      "description": "The personal or forename part of the person's name, if available.",
+      "type": "string"
+    },
+    "suffix": {
+      "description": "The suffix part of the person's name, if available.",
+      "type": "string"
+    },
+    "affiliation": {
+      "description": "Institutional affiliations as entered at the time of submission, if available.",
+      "type": "array",
+      "items": {
+        "type": "string"
+      },
+      "minItems": 0
+    },
+    "orcid": {
+      "description": "ORCID identifier, if available.",
+      "oneOf": [
+        {"type": "string"},
+        {"type": "null"}
+      ]
+    },
+    "author_id": {
+      "description": "arXiv author identifier, if available.",
+      "oneOf": [
+        {"type": "string"},
+        {"type": "null"}
+      ]
+    }
+  },
+  "required": [
+    "full_name"
+  ]
+}
diff --git a/schema/search.yaml b/schema/search.yaml
index d676caf3..6c1f49ba 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -2,7 +2,7 @@ openapi: "3.0.0"
 info:
   version: "0.1"
   title: "arXiv Search API"
-  description: "A RESTful API for arXiv documents."
+  description: "A RESTful API for arXiv metadata."
   termsOfService: "https://arxiv.org/help/general",
   contact:
     name: "arXiv API Team"
diff --git a/search/config.py b/search/config.py
index cf287772..d223026e 100644
--- a/search/config.py
+++ b/search/config.py
@@ -228,5 +228,19 @@
 RELEASE_NOTES_TEXT = 'Search v0.4 released 2018-07-18'
 
 
+EXTERNAL_URL_SCHEME = os.environ.get('EXTERNAL_URL_SCHEME', 'https')
+BASE_SERVER = os.environ.get('BASE_SERVER', 'arxiv.org')
+
+URLS = [
+    ("pdf", "/pdf/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("pdfonly", "/pdf/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("dvi", "/dvi/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("html", "/html/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("ps", "/ps/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("source", "/e-print/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("other", "/format/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+]
+
+
 # TODO: one place to set the version, update release notes text, JIRA issue
 # collector, etc.
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 851032f7..f1a6133a 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -14,12 +14,14 @@
 from flask import url_for
 
 from arxiv import status, taxonomy
+from arxiv.base import logging
 
 from search.services import index, fulltext, metadata
 from search.controllers.util import paginate
 from ...domain import Query, APIQuery, FieldedSearchList, FieldedSearchTerm, \
     DateRange, ClassificationList, Classification, asdict
 
+logger = logging.getLogger(__name__)
 Response = Tuple[Dict[str, Any], int, Dict[str, Any]]
 EASTERN = timezone('US/Eastern')
 
@@ -55,7 +57,39 @@ def search(params: MultiDict) -> Response:
         q.primary_classification = classifications
 
     q = paginate(q, params)
-    return asdict(index.search(q, highlight=False)), status.HTTP_200_OK, {}
+    return index.search(q, highlight=False), status.HTTP_200_OK, {}
+
+
+def paper(paper_id: str) -> Response:
+    """
+    Handle a request for paper metadata from the API.
+
+    Parameters
+    ----------
+    paper_id : str
+        arXiv paper ID for the requested paper.
+
+    Returns
+    -------
+    dict
+        Response data (to serialize).
+    int
+        HTTP status code.
+    dict
+        Extra headers for the response.
+
+    Raises
+    ------
+    :class:`NotFound`
+        Raised when there is no document with the provided paper ID.
+
+    """
+    try:
+        document = index.get_document(paper_id)
+    except index.DocumentNotFound as e:
+        logger.error('Document not found')
+        raise NotFound('No such document') from e
+    return document, status.HTTP_200_OK, {}
 
 
 def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
diff --git a/search/domain/base.py b/search/domain/base.py
index 91dc0596..cf74b4cd 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -17,6 +17,25 @@ def asdict(obj: Any) -> dict:
     return {key: value for key, value in _asdict(obj).items()}
 
 
+@dataclass
+class Person:
+    """Represents an author, owner, or other person in metadata."""
+
+    full_name: str
+    last_name: str = field(default_factory=str)
+    first_name: str = field(default_factory=str)
+    suffix: str = field(default_factory=str)
+
+    affiliation: List[str] = field(default_factory=list)
+    """Institutional affiliations."""
+
+    orcid: Optional[str] = field(default=None)
+    """ORCID identifier."""
+
+    author_id: Optional[str] = field(default=None)
+    """Legacy arXiv author identifier."""
+
+
 @dataclass
 class DocMeta:
     """Metadata for an arXiv paper, retrieved from the core repository."""
@@ -102,14 +121,14 @@ def __str__(self) -> str:
 class Classification:
     """Represents an arXiv classification for a paper."""
 
-    group: Optional[str] = None
-    archive: Optional[str] = None
-    category: Optional[str] = None
+    group: Optional[dict] = None
+    archive: Optional[dict] = None
+    category: Optional[dict] = None
 
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""
         return ":".join(
-            [p for p in [self.group, self.archive, self.category] if p]
+            [p['id'] for p in [self.group, self.archive, self.category] if p]
         )
 
 
diff --git a/search/routes/api.py b/search/routes/api/__init__.py
similarity index 53%
rename from search/routes/api.py
rename to search/routes/api/__init__.py
index 86e1f7ae..8cfdd3ec 100644
--- a/search/routes/api.py
+++ b/search/routes/api/__init__.py
@@ -1,3 +1,5 @@
+"""Provides routing blueprint from the search API."""
+
 import json
 from typing import Dict, Callable, Union, Any, Optional, List
 from functools import wraps
@@ -14,6 +16,8 @@
 from werkzeug.exceptions import InternalServerError
 from search.controllers import api
 
+from . import serialize
+
 # from arxiv.users.auth.decorators import scoped
 # from arxiv.users.auth import scopes
 
@@ -21,9 +25,23 @@
 
 blueprint = Blueprint('api', __name__, url_prefix='/')
 
+ATOM_XML = "application/atom+xml"
+JSON = "application/json"
+
 
 # @scoped(scopes.READ_API)
 @blueprint.route('/', methods=['GET'])
 def search() -> Response:
-    data, status, headers = api.search(request.args)
-    return jsonify(data), status, headers
+    """Main query endpoint."""
+    data, status_code, headers = api.search(request.args)
+    # requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
+    # if requested == ATOM_XML:
+    #     return serialize.as_atom(data), status, headers
+    return serialize.as_json(data), status_code, headers
+
+
+@blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
+def paper(paper_id: str, version: str) -> Response:
+    """Document metadata endpoint."""
+    data, status_code, headers = api.paper(f'{paper_id}v{version}')
+    return serialize.as_json(data), status_code, headers
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
new file mode 100644
index 00000000..53043929
--- /dev/null
+++ b/search/routes/api/serialize.py
@@ -0,0 +1,168 @@
+"""Serializers for API responses."""
+
+from typing import Union
+from lxml import etree
+from flask import jsonify, url_for
+
+from arxiv import status
+from search.domain import DocumentSet, Document, Classification, Person
+
+
+class BaseSerializer(object):
+    """Base class for API serializers."""
+
+    # def __init__(cls, document_set: DocumentSet) -> None:
+    #     """Initialize with a :class:`DocumentSet`."""
+        # cls._document_set = document_set
+
+
+# class AtomXMLSerializer(BaseSerializer):
+#     """."""
+#
+#     ATOM = "http://www.w3.org/2005/Atom"
+#     OPENSEARCH = "http://a9.com/-/spec/opensearch/1.1/"
+#     ARXIV = "http://arxiv.org/schemas/atom"
+#     NSMAP = {
+#         None: ATOM,
+#         "opensearch": OPENSEARCH,
+#         "arxiv": ARXIV
+#     }
+#     fields = {
+#         'title': '{%s}title' % ATOM,
+#         'id': '{%s}id' % ATOM,
+#         'submitted_date': '{%s}published' % ATOM,
+#         'modified_date': '{%s}updated' % ATOM,
+#         'abstract': '{%s}summary' % ATOM,
+#         ''
+#     }
+#
+#     def __init__(cls, *args, **kwargs) -> None:
+#         super(AtomXMLSerializer, cls).__init__(*args, **kwargs)
+#         cls._root = etree.Element('feed', nsmap=cls.NSMAP)
+#
+#     def transform(cls):
+#         for document in cls.iter_documents():
+#
+#
+#
+#     def __repr__(cls) -> str:
+#         return etree.tostring(cls._root, pretty_print=True)
+
+
+class JSONSerializer(BaseSerializer):
+    """Serializes a :class:`DocumentSet` as JSON."""
+
+    @classmethod
+    def _transform_person(cls, person: Person) -> dict:
+        return {
+            'first_name': person.first_name,
+            'last_name': person.last_name,
+            'suffix': person.suffix,
+            'affiliation': person.affiliation,
+            'orcid': person.orcid,
+            'author_id': person.author_id,
+            'full_name': person.full_name,
+        }
+
+    @classmethod
+    def _transform_classification(cls, clsn: Classification) -> dict:
+        return {
+            'group': clsn.group,
+            'archive': clsn.archive,
+            'category': clsn.category
+        }
+
+    @classmethod
+    def _transform_format(cls, fmt: str, paper_id: str, version: int) -> dict:
+        return {
+            "format": fmt,
+            "href": url_for(fmt, paper_id=paper_id, version=version)
+        }
+
+    @classmethod
+    def _transform_latest(cls, document: Document) -> dict:
+        return {
+            "paper_id": document.latest,
+            "href": url_for("api.paper", paper_id=document.paper_id,
+                            version=document.latest_version,
+                            _external=True),
+            "version": document.latest_version
+        }
+
+    @classmethod
+    def _transform_license(cls, license: dict) -> dict:
+        return {
+            'label': license['label'],
+            'href': license['uri']
+        }
+
+    @classmethod
+    def transform_document(cls, doc: Document) -> dict:
+        """Select a subset of :class:`Document` properties for public API."""
+        return {
+            'abs_categories': doc.abs_categories,
+            'abstract': doc.abstract,
+            'acm_class': doc.acm_class,
+            'owners': [
+                cls._transform_person(owner) for owner in doc.owners
+            ],
+            'authors': [
+                cls._transform_person(author) for author in doc.authors
+            ],
+            'comments': doc.comments,
+            'submitted_date': doc.submitted_date,
+            'submitted_date_first': doc.submitted_date_first,
+            'announced_date_first': doc.announced_date_first.strftime('%Y-%m'),
+            'paper_id': doc.paper_id_v,
+            'doi': doc.doi,
+            'formats': [
+                cls._transform_format(fmt, doc.paper_id, doc.version)
+                for fmt in doc.formats
+            ],
+            'is_current': doc.is_current,
+            'is_withdrawn': doc.is_withdrawn,
+            'journal_ref': doc.journal_ref,
+            'license': cls._transform_license(doc.license),
+            'msc_class': doc.msc_class,
+            'primary_classification': cls._transform_classification(
+                doc.primary_classification
+            ),
+            'secondary_classification': [
+                cls._transform_classification(clsn)
+                for clsn in doc.secondary_classification
+            ],
+            'report_num': doc.report_num,
+            'source': doc.source,  # TODO: link?
+            'submitter': cls._transform_person(doc.submitter),
+            'title': doc.title,
+            'version': doc.version,
+            'latest': cls._transform_latest(doc),
+        }
+
+    @classmethod
+    def serialize(cls, document_set: DocumentSet) -> str:
+        """Generate JSON for a :class:`DocumentSet`."""
+        return jsonify({
+            'results': [
+                cls.transform_document(doc)
+                for doc in document_set.results
+            ],
+            'metadata': {
+                'start': document_set.metadata.get('start'),
+                'end': document_set.metadata.get('end'),
+                'offset': document_set.metadata.get('offset'),
+                'total': document_set.metadata.get('total'),
+            },
+        })
+
+    @classmethod
+    def serialize_document(cls, document: Document) -> str:
+        """Generate JSON for a single :class:`Document`."""
+        return jsonify(cls.transform_document(document))
+
+
+def as_json(document_or_document_set: Union[DocumentSet, Document]) -> str:
+    """Serialize a :class:`DocumentSet` as JSON."""
+    if type(document_or_document_set) is DocumentSet:
+        return JSONSerializer.serialize(document_or_document_set)
+    return JSONSerializer.serialize_document(document_or_document_set)
diff --git a/search/routes/api/tests/__init__.py b/search/routes/api/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/search/routes/api/tests/test_serialize.py b/search/routes/api/tests/test_serialize.py
new file mode 100644
index 00000000..39af719b
--- /dev/null
+++ b/search/routes/api/tests/test_serialize.py
@@ -0,0 +1,3 @@
+from unittest import TestCase
+
+from ....domain import DocumentSet, Document
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index 84d9f0d5..3d355832 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -75,6 +75,9 @@ def handle_es_exceptions() -> Generator:
         elif e.error == 'parsing_exception':
             logger.error('ES parsing_exception: %s', e.info)
             raise QueryError(e.info) from e
+        elif e.status_code == 404:
+            logger.error('Caught NotFoundError: %s', e)
+            raise DocumentNotFound('No such document')
         logger.error('Problem communicating with ES: %s' % e.error)
         raise IndexConnectionError(
             'Problem communicating with ES: %s' % e.error
@@ -357,7 +360,7 @@ def get_document(self, document_id: int) -> Document:
         if not record:
             logger.error("No such document: %s", document_id)
             raise DocumentNotFound('No such document')
-        return Document(**record['_source'])    # type: ignore
+        return results.to_document(record['_source'], highlight=False)
         # See https://github.com/python/mypy/issues/3937
 
     def search(self, query: Query, highlight: bool = True) -> DocumentSet:
diff --git a/search/services/index/results.py b/search/services/index/results.py
index fe188564..a67c9215 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -7,11 +7,11 @@
 import re
 from datetime import datetime
 from math import floor
-from typing import Any, Dict
+from typing import Any, Dict, Union
 
 from elasticsearch_dsl.response import Response
 from elasticsearch_dsl.utils import AttrList, AttrDict
-from search.domain import Document, Query, DocumentSet
+from search.domain import Document, Query, DocumentSet, Classification, Person
 from arxiv.base import logging
 
 from .util import MAX_RESULTS, TEXISM
@@ -21,22 +21,36 @@
 logger.propagate = False
 
 
-def _to_author(author_data: dict) -> dict:
+def _to_author(author_data: dict) -> Person:
     """Prevent e-mail, other extraneous data, from escaping."""
-    return {k: v for k, v in author_data.items() if k != 'email'}
+    data = {}
+    for key, value in author_data.items():
+        if key == 'email':
+            continue
+        elif key == 'name':
+            key = 'full_name'
+        if key not in Person.__dataclass_fields__:
+            continue
+        data[key] = value
+    return Person(**data)
 
 
-def _to_document(raw: Response, highlight: bool = True) -> Document:
+def to_document(raw: Union[Response, dict], highlight: bool = True) \
+        -> Document:
     """Transform an ES search result back into a :class:`.Document`."""
     # typing: ignore
     result: Dict[str, Any] = {}
 
     result['match'] = {}  # Hit on field, but no highlighting.
     result['truncated'] = {}    # Preview is truncated.
+
     for key in Document.fields():
         if not hasattr(raw, key):
-            continue
-        value = getattr(raw, key)
+            if key not in raw:
+                continue
+            value = raw.get(key)
+        else:
+            value = getattr(raw, key)
 
         # We want to prevent ES-specific data types from escaping the module
         # API.
@@ -45,27 +59,32 @@ def _to_document(raw: Response, highlight: bool = True) -> Document:
         elif isinstance(value, AttrDict):
             value = value.to_dict()
 
-        if key in ['authors', 'owners']:
+        if key == 'primary_classification':
+            value = Classification(**value)
+        elif key == 'secondary_classification':
+            value = [Classification(**v) for v in value]
+        elif key in ['authors', 'owners']:
             value = [_to_author(au) for au in value]
         elif key == 'submitter':
             value = _to_author(value)
 
-        if key == 'announced_date_first' and value and isinstance(value, str):
+        elif key == 'announced_date_first' and \
+                value and isinstance(value, str):
             value = datetime.strptime(value, '%Y-%m').date()
-        if key in ['submitted_date', 'submitted_date_first',
-                   'submitted_date_latest']:
+        elif key in ['submitted_date', 'submitted_date_first',
+                     'submitted_date_latest']:
             try:
                 value = datetime.strptime(value, '%Y-%m-%dT%H:%M:%S%z')
             except (ValueError, TypeError):
-                logger.warning(
-                    f'Could not parse {key}: {value} as datetime'
-                )
+                logger.warning(f'Could not parse {key}: {value} as datetime')
                 pass
-        if key in ['acm_class', 'msc_class'] and value:
+        elif key in ['acm_class', 'msc_class'] and value:
             value = '; '.join(value)
 
         result[key] = value
-    result['score'] = raw.meta.score
+
+    if hasattr(raw, 'meta'):
+        result['score'] = raw.meta.score
     if type(result['abstract']) is str and highlight:
         result['preview']['abstract'] = preview(result['abstract'])
         if result['preview']['abstract'].endswith('&hellip;'):
@@ -75,7 +94,7 @@ def _to_document(raw: Response, highlight: bool = True) -> Document:
         result['highlight'] = {}
         logger.debug('%s: add highlighting to result', raw.paper_id)
         result = add_highlighting(result, raw)
-    
+
     return Document(**result)   # type: ignore
     # See https://github.com/python/mypy/issues/3937
 
@@ -116,6 +135,6 @@ def to_documentset(query: Query, response: Response, highlight: bool = True) \
             'size': query.size,
             'max_pages': max_pages
         },
-        'results': [_to_document(raw, highlight=highlight) for raw in response]
+        'results': [to_document(raw, highlight=highlight) for raw in response]
     })
     # See https://github.com/python/mypy/issues/3937
diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index e671c1fa..bf13524e 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -25,7 +25,11 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
         """:class:`.index.search` supports :class:`AdvancedQuery`."""
         mock_results = mock.MagicMock()
         mock_results.__getitem__.return_value = {'total': 53}
-        mock_result = mock.MagicMock()
+        mock_result = mock.MagicMock(
+            authors=[{'full_name': 'N. Ame'}],
+            owners=[{'full_name': 'N. Ame'}],
+            submitter={'full_name': 'N. Ame'}
+        )
         mock_result.meta.score = 1
         mock_results.__iter__.return_value = [mock_result]
         mock_Search.execute.return_value = mock_results
@@ -89,7 +93,11 @@ def test_simple_query(self, mock_Elasticsearch, mock_Search):
         """:class:`.index.search` supports :class:`SimpleQuery`."""
         mock_results = mock.MagicMock()
         mock_results.__getitem__.return_value = {'total': 53}
-        mock_result = mock.MagicMock()
+        mock_result = mock.MagicMock(
+            authors=[{'full_name': 'N. Ame'}],
+            owners=[{'full_name': 'N. Ame'}],
+            submitter={'full_name': 'N. Ame'}
+        )
         mock_result.meta.score = 1
         mock_results.__iter__.return_value = [mock_result]
         mock_Search.execute.return_value = mock_results

From 49347289bd39e9aea8f992042fab15fbba9b9ef3 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 13:15:17 -0400
Subject: [PATCH 07/77] cleanup

---
 search/routes/api/serialize.py        | 2 +-
 search/services/index/highlighting.py | 1 -
 search/services/index/tests/tests.py  | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 53043929..4528e1bc 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -150,7 +150,7 @@ def serialize(cls, document_set: DocumentSet) -> str:
             'metadata': {
                 'start': document_set.metadata.get('start'),
                 'end': document_set.metadata.get('end'),
-                'offset': document_set.metadata.get('offset'),
+                'size': document_set.metadata.get('size'),
                 'total': document_set.metadata.get('total'),
             },
         })
diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index 82ec7c4b..65fa6e42 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -223,7 +223,6 @@ def add_highlighting(result: dict, raw: Response) -> dict:
 
     for field in ['abstract.tex', 'abstract.english', 'abstract']:
         if field in result['highlight']:
-            print(field)
             value = result['highlight'][field]
             abstract_snippet = preview(value)
             result['preview']['abstract'] = abstract_snippet
diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index bf13524e..85bc1f45 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -52,9 +52,9 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
             ),
             primary_classification=ClassificationList([
                 Classification(
-                    group='physics',
-                    archive='physics',
-                    category='hep-th'
+                    group={'id': 'physics'},
+                    archive={'id': 'physics'},
+                    category={'id': 'hep-th'}
                 )
             ]),
             terms=FieldedSearchList([

From ebe82b8a05d69e0cf90daa05093cf3e0433214c9 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 13:46:28 -0400
Subject: [PATCH 08/77] cleaned up mypy issues

---
 search/controllers/api/__init__.py | 28 +++++++++++++++-------------
 search/domain/base.py              | 11 ++++++++---
 search/routes/api/serialize.py     | 23 +++++++++++++++++------
 search/services/index/advanced.py  | 20 +++++++++++---------
 search/services/index/results.py   | 26 +++++++++++++++-----------
 5 files changed, 66 insertions(+), 42 deletions(-)

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index f1a6133a..a9730184 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -19,14 +19,14 @@
 from search.services import index, fulltext, metadata
 from search.controllers.util import paginate
 from ...domain import Query, APIQuery, FieldedSearchList, FieldedSearchTerm, \
-    DateRange, ClassificationList, Classification, asdict
+    DateRange, ClassificationList, Classification, asdict, DocumentSet, \
+    Document
 
 logger = logging.getLogger(__name__)
-Response = Tuple[Dict[str, Any], int, Dict[str, Any]]
 EASTERN = timezone('US/Eastern')
 
 
-def search(params: MultiDict) -> Response:
+def search(params: MultiDict) -> Tuple[DocumentSet, int, Dict[str, Any]]:
     """
     Handle a search request from the API.
 
@@ -56,11 +56,11 @@ def search(params: MultiDict) -> Response:
     if classifications is not None:
         q.primary_classification = classifications
 
-    q = paginate(q, params)
+    q = paginate(q, params)     # type: ignore
     return index.search(q, highlight=False), status.HTTP_200_OK, {}
 
 
-def paper(paper_id: str) -> Response:
+def paper(paper_id: str) -> Tuple[Document, int, Dict[str, Any]]:
     """
     Handle a request for paper metadata from the API.
 
@@ -97,11 +97,13 @@ def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
     for field, _ in Query.SUPPORTED_FIELDS:
         values = params.getlist(field)
         for value in values:
-            terms.append(
-                FieldedSearchTerm(operator='AND', field=field, term=value)
-            )
+            terms.append(FieldedSearchTerm(     # type: ignore
+                operator='AND',
+                field=field,
+                term=value
+            ))
     if len(terms) == 0:
-        return
+        return None
     return terms
 
 
@@ -120,8 +122,8 @@ def _get_date_params(params: MultiDict) -> Optional[DateRange]:
             raise BadRequest({'field': field, 'reason': 'invalid datetime'})
         date_params[field] = dt
     if date_params:
-        return DateRange(**date_params)
-    return
+        return DateRange(**date_params)  # type: ignore
+    return None
 
 
 def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
@@ -133,8 +135,8 @@ def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
                 'reason': 'not a valid archive'
             })
         classifications.append(
-            Classification(archive=value)
+            Classification(archive=value)   # type: ignore
         )
     if len(classifications) == 0:
-        return
+        return None
     return classifications
diff --git a/search/domain/base.py b/search/domain/base.py
index cf74b4cd..a41f39c7 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -35,6 +35,11 @@ class Person:
     author_id: Optional[str] = field(default=None)
     """Legacy arXiv author identifier."""
 
+    @classmethod
+    def fields(cls) -> List[str]:
+        """Get the names of fields on this class."""
+        return cls.__dataclass_fields__.keys()  # type: ignore
+
 
 @dataclass
 class DocMeta:
@@ -213,9 +218,9 @@ class Document:
     id: str = field(default_factory=str)
     abstract: str = field(default_factory=str)
     abstract_tex: str = field(default_factory=str)
-    authors: List[Dict] = field(default_factory=list)
+    authors: List[Person] = field(default_factory=list)
     authors_freeform: str = field(default_factory=str)
-    owners: List[Dict] = field(default_factory=list)
+    owners: List[Person] = field(default_factory=list)
     modified_date: str = field(default_factory=str)
     updated_date: str = field(default_factory=str)
     is_current: bool = True
@@ -229,7 +234,7 @@ class Document:
     version: int = 1
     latest: str = field(default_factory=str)
     latest_version: int = 0
-    submitter: Dict = field(default_factory=dict)
+    submitter: Optional[Person] = field(default=None)
     report_num: str = field(default_factory=str)
     proxy: bool = False
     msc_class: List[str] = field(default_factory=list)
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 4528e1bc..4bdfa3b5 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -105,14 +105,20 @@ def transform_document(cls, doc: Document) -> dict:
             'acm_class': doc.acm_class,
             'owners': [
                 cls._transform_person(owner) for owner in doc.owners
+                if owner is not None
             ],
             'authors': [
                 cls._transform_person(author) for author in doc.authors
+                if author is not None
             ],
             'comments': doc.comments,
             'submitted_date': doc.submitted_date,
             'submitted_date_first': doc.submitted_date_first,
-            'announced_date_first': doc.announced_date_first.strftime('%Y-%m'),
+            'announced_date_first': (
+                doc.announced_date_first.strftime('%Y-%m')
+                if doc.announced_date_first is not None
+                else None
+            ),
             'paper_id': doc.paper_id_v,
             'doi': doc.doi,
             'formats': [
@@ -133,7 +139,10 @@ def transform_document(cls, doc: Document) -> dict:
             ],
             'report_num': doc.report_num,
             'source': doc.source,  # TODO: link?
-            'submitter': cls._transform_person(doc.submitter),
+            'submitter': (
+                cls._transform_person(doc.submitter)
+                if doc.submitter is not None else None
+            ),
             'title': doc.title,
             'version': doc.version,
             'latest': cls._transform_latest(doc),
@@ -142,7 +151,7 @@ def transform_document(cls, doc: Document) -> dict:
     @classmethod
     def serialize(cls, document_set: DocumentSet) -> str:
         """Generate JSON for a :class:`DocumentSet`."""
-        return jsonify({
+        serialized: str = jsonify({
             'results': [
                 cls.transform_document(doc)
                 for doc in document_set.results
@@ -154,15 +163,17 @@ def serialize(cls, document_set: DocumentSet) -> str:
                 'total': document_set.metadata.get('total'),
             },
         })
+        return serialized
 
     @classmethod
     def serialize_document(cls, document: Document) -> str:
         """Generate JSON for a single :class:`Document`."""
-        return jsonify(cls.transform_document(document))
+        serialized: str = jsonify(cls.transform_document(document))
+        return serialized
 
 
 def as_json(document_or_document_set: Union[DocumentSet, Document]) -> str:
     """Serialize a :class:`DocumentSet` as JSON."""
     if type(document_or_document_set) is DocumentSet:
-        return JSONSerializer.serialize(document_or_document_set)
-    return JSONSerializer.serialize_document(document_or_document_set)
+        return JSONSerializer.serialize(document_or_document_set)  # type: ignore
+    return JSONSerializer.serialize_document(document_or_document_set)  # type: ignore
diff --git a/search/services/index/advanced.py b/search/services/index/advanced.py
index f43cc03c..bc4e2804 100644
--- a/search/services/index/advanced.py
+++ b/search/services/index/advanced.py
@@ -1,17 +1,18 @@
 """Supports the advanced search feature."""
 
-from typing import Any
+from typing import Any, Union
 
 from elasticsearch_dsl import Search, Q, SF
 from elasticsearch_dsl.query import Range, Match, Bool
 
-from search.domain import AdvancedQuery, Classification
+from search.domain import AdvancedQuery, Classification, APIQuery
 
 from .prepare import SEARCH_FIELDS
 from .util import sort
 
 
-def advanced_search(search: Search, query: AdvancedQuery) -> Search:
+def advanced_search(search: Search,
+                    query: Union[AdvancedQuery, APIQuery]) -> Search:
     """
     Prepare a :class:`.Search` from a :class:`.AdvancedQuery`.
 
@@ -19,8 +20,9 @@ def advanced_search(search: Search, query: AdvancedQuery) -> Search:
     ----------
     search : :class:`.Search`
         An Elasticsearch search in preparation.
-    query : :class:`.AdvancedQuery`
-        An advanced query, originating from the advanced search controller.
+    query : :class:`.AdvancedQuery` or :class:`APIQuery`
+        An advanced query, originating from the advanced search controller or
+        the API, respectively.
 
     Returns
     -------
@@ -62,7 +64,7 @@ def _classification(field: str, classification: Classification) -> Match:
     return query
 
 
-def _classifications(q: AdvancedQuery) -> Match:
+def _classifications(q: Union[AdvancedQuery, APIQuery]) -> Match:
     """Get a query part for classifications on an :class:`.AdvancedQuery`."""
     if not q.primary_classification:
         return Q()
@@ -74,7 +76,7 @@ def _classifications(q: AdvancedQuery) -> Match:
     return query
 
 
-def _date_range(q: AdvancedQuery) -> Range:
+def _date_range(q: Union[AdvancedQuery, APIQuery]) -> Range:
     """Generate a query part for a date range."""
     if not q.date_range:
         return Q()
@@ -121,7 +123,7 @@ def _get_operator(obj: Any) -> str:
     return obj.operator     # type: ignore
 
 
-def _group_terms(query: AdvancedQuery) -> tuple:
+def _group_terms(query: Union[AdvancedQuery, APIQuery]) -> tuple:
     """Group fielded search terms into a set of nested tuples."""
     terms = query.terms[:]
     for operator in ['NOT', 'AND', 'OR']:
@@ -136,7 +138,7 @@ def _group_terms(query: AdvancedQuery) -> tuple:
     return terms[0]     # type: ignore
 
 
-def _fielded_terms_to_q(query: AdvancedQuery) -> Match:
+def _fielded_terms_to_q(query: Union[AdvancedQuery, APIQuery]) -> Match:
     if len(query.terms) == 1:
         return SEARCH_FIELDS[query.terms[0].field](query.terms[0].term)
     elif len(query.terms) > 1:
diff --git a/search/services/index/results.py b/search/services/index/results.py
index a67c9215..f23dc04d 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -29,10 +29,10 @@ def _to_author(author_data: dict) -> Person:
             continue
         elif key == 'name':
             key = 'full_name'
-        if key not in Person.__dataclass_fields__:
+        if key not in Person.fields():
             continue
         data[key] = value
-    return Person(**data)
+    return Person(**data)   # type: ignore
 
 
 def to_document(raw: Union[Response, dict], highlight: bool = True) \
@@ -45,12 +45,15 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
     result['truncated'] = {}    # Preview is truncated.
 
     for key in Document.fields():
-        if not hasattr(raw, key):
+        if type(raw) is Response:
+            if not hasattr(raw, key):
+                continue
+            value = getattr(raw, key)
+
+        elif type(raw) is dict:
             if key not in raw:
                 continue
             value = raw.get(key)
-        else:
-            value = getattr(raw, key)
 
         # We want to prevent ES-specific data types from escaping the module
         # API.
@@ -60,9 +63,9 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
             value = value.to_dict()
 
         if key == 'primary_classification':
-            value = Classification(**value)
+            value = Classification(**value)  # type: ignore
         elif key == 'secondary_classification':
-            value = [Classification(**v) for v in value]
+            value = [Classification(**v) for v in value]  # type: ignore
         elif key in ['authors', 'owners']:
             value = [_to_author(au) for au in value]
         elif key == 'submitter':
@@ -83,16 +86,17 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
 
         result[key] = value
 
-    if hasattr(raw, 'meta'):
-        result['score'] = raw.meta.score
+    if type(raw) is Response:
+        result['score'] = raw.meta.score    # type: ignore
     if type(result['abstract']) is str and highlight:
         result['preview']['abstract'] = preview(result['abstract'])
         if result['preview']['abstract'].endswith('&hellip;'):
             result['truncated']['abstract'] = True
 
-    if highlight:
+    if highlight and type(raw) is Response:
         result['highlight'] = {}
-        logger.debug('%s: add highlighting to result', raw.paper_id)
+        logger.debug('%s: add highlighting to result',
+                     raw.paper_id)  # type: ignore
         result = add_highlighting(result, raw)
 
     return Document(**result)   # type: ignore

From 743329b51ec944556b0ff58ab43484e2bba04c7f Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 13:47:31 -0400
Subject: [PATCH 09/77] fixed docstyle issues

---
 search/routes/api/tests/__init__.py       | 1 +
 search/routes/api/tests/test_serialize.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/search/routes/api/tests/__init__.py b/search/routes/api/tests/__init__.py
index e69de29b..1b3faf02 100644
--- a/search/routes/api/tests/__init__.py
+++ b/search/routes/api/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests for API routes."""
diff --git a/search/routes/api/tests/test_serialize.py b/search/routes/api/tests/test_serialize.py
index 39af719b..cca56750 100644
--- a/search/routes/api/tests/test_serialize.py
+++ b/search/routes/api/tests/test_serialize.py
@@ -1,3 +1,5 @@
+"""Tests for serializers."""
+
 from unittest import TestCase
 
 from ....domain import DocumentSet, Document

From c9e254cb798e305bc2cf35d35fd6badc52f8e0f5 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 13:52:28 -0400
Subject: [PATCH 10/77] fixed failing test

---
 search/routes/api/serialize.py   | 70 +++++++++++++++-----------------
 search/services/index/results.py |  6 ++-
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 4bdfa3b5..a6a96060 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -11,43 +11,6 @@
 class BaseSerializer(object):
     """Base class for API serializers."""
 
-    # def __init__(cls, document_set: DocumentSet) -> None:
-    #     """Initialize with a :class:`DocumentSet`."""
-        # cls._document_set = document_set
-
-
-# class AtomXMLSerializer(BaseSerializer):
-#     """."""
-#
-#     ATOM = "http://www.w3.org/2005/Atom"
-#     OPENSEARCH = "http://a9.com/-/spec/opensearch/1.1/"
-#     ARXIV = "http://arxiv.org/schemas/atom"
-#     NSMAP = {
-#         None: ATOM,
-#         "opensearch": OPENSEARCH,
-#         "arxiv": ARXIV
-#     }
-#     fields = {
-#         'title': '{%s}title' % ATOM,
-#         'id': '{%s}id' % ATOM,
-#         'submitted_date': '{%s}published' % ATOM,
-#         'modified_date': '{%s}updated' % ATOM,
-#         'abstract': '{%s}summary' % ATOM,
-#         ''
-#     }
-#
-#     def __init__(cls, *args, **kwargs) -> None:
-#         super(AtomXMLSerializer, cls).__init__(*args, **kwargs)
-#         cls._root = etree.Element('feed', nsmap=cls.NSMAP)
-#
-#     def transform(cls):
-#         for document in cls.iter_documents():
-#
-#
-#
-#     def __repr__(cls) -> str:
-#         return etree.tostring(cls._root, pretty_print=True)
-
 
 class JSONSerializer(BaseSerializer):
     """Serializes a :class:`DocumentSet` as JSON."""
@@ -177,3 +140,36 @@ def as_json(document_or_document_set: Union[DocumentSet, Document]) -> str:
     if type(document_or_document_set) is DocumentSet:
         return JSONSerializer.serialize(document_or_document_set)  # type: ignore
     return JSONSerializer.serialize_document(document_or_document_set)  # type: ignore
+
+
+# class AtomXMLSerializer(BaseSerializer):
+#     """."""
+#
+#     ATOM = "http://www.w3.org/2005/Atom"
+#     OPENSEARCH = "http://a9.com/-/spec/opensearch/1.1/"
+#     ARXIV = "http://arxiv.org/schemas/atom"
+#     NSMAP = {
+#         None: ATOM,
+#         "opensearch": OPENSEARCH,
+#         "arxiv": ARXIV
+#     }
+#     fields = {
+#         'title': '{%s}title' % ATOM,
+#         'id': '{%s}id' % ATOM,
+#         'submitted_date': '{%s}published' % ATOM,
+#         'modified_date': '{%s}updated' % ATOM,
+#         'abstract': '{%s}summary' % ATOM,
+#         ''
+#     }
+#
+#     def __init__(cls, *args, **kwargs) -> None:
+#         super(AtomXMLSerializer, cls).__init__(*args, **kwargs)
+#         cls._root = etree.Element('feed', nsmap=cls.NSMAP)
+#
+#     def transform(cls):
+#         for document in cls.iter_documents():
+#
+#
+#
+#     def __repr__(cls) -> str:
+#         return etree.tostring(cls._root, pretty_print=True)
diff --git a/search/services/index/results.py b/search/services/index/results.py
index f23dc04d..ec6036cf 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -54,6 +54,8 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
             if key not in raw:
                 continue
             value = raw.get(key)
+        else:
+            continue
 
         # We want to prevent ES-specific data types from escaping the module
         # API.
@@ -88,7 +90,9 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
 
     if type(raw) is Response:
         result['score'] = raw.meta.score    # type: ignore
-    if type(result['abstract']) is str and highlight:
+    if type(result.get('abstract')) is str and highlight:
+        if 'preview' not in result:
+            result['preview'] = {}
         result['preview']['abstract'] = preview(result['abstract'])
         if result['preview']['abstract'].endswith('&hellip;'):
             result['truncated']['abstract'] = True

From b67b0618674fdd129b7afb3779573e8f07cdf001 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 14:22:31 -0400
Subject: [PATCH 11/77] ARXIVNG-1177 added tests using JSON schema

---
 schema/resources/Document.json            |  11 +-
 schema/resources/DocumentSet.json         |   7 +-
 search/config.py                          |   2 +
 search/factory.py                         |   1 -
 search/routes/api/tests/test_serialize.py | 160 +++++++++++++++++++++-
 5 files changed, 170 insertions(+), 11 deletions(-)

diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index d67f672d..3120a19c 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -68,13 +68,13 @@
     "owners": {
       "items": {
         "type": "object",
-        "$ref": "Person.json#Person"
+        "$ref": "./Person.json"
       }
     },
     "authors": {
       "items": {
         "type": "object",
-        "$ref": "Person.json#Person"
+        "$ref": "./Person.json"
       },
       "minItems": 1,
       "type": "array"
@@ -147,11 +147,11 @@
       "uniqueItems": true
     },
     "primary_classification": {
-      "$ref": "#/definitions/classification"
+      "$ref": "./Document.json#/definitions/classification"
     },
     "secondary_classification": {
       "type": "array",
-      "items": {"$ref": "#/definitions/classification"}
+      "items": {"$ref": "./Document.json#/definitions/classification"}
     },
     "report_num": {
       "type": "string"
@@ -174,7 +174,7 @@
     "submitter": {
       "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
       "type": "object",
-      "$ref": "Person.json#Person"
+      "$ref": "./Person.json"
     },
     "title": {
       "type": "string"
@@ -212,7 +212,6 @@
     "is_withdrawn",
     "license",
     "paper_id",
-    "paper_id_v",
     "primary_classification",
     "title",
     "source",
diff --git a/schema/resources/DocumentSet.json b/schema/resources/DocumentSet.json
index 9c894a6d..1b53b35d 100644
--- a/schema/resources/DocumentSet.json
+++ b/schema/resources/DocumentSet.json
@@ -23,8 +23,11 @@
       }
     },
     "results": {
-      "type": "object",
-      "$ref": "Document.json#Document"
+      "type": "array",
+      "items": {
+        "type": "object",
+        "$ref": "Document.json"
+      }
     }
   }
 }
diff --git a/search/config.py b/search/config.py
index d223026e..3cde90c1 100644
--- a/search/config.py
+++ b/search/config.py
@@ -9,6 +9,8 @@
 ON = 'yes'
 OFF = 'no'
 
+SERVER_NAME = os.environ.get('SERVER_NAME', 'localhost')
+
 DEBUG = os.environ.get('DEBUG') == ON
 """enable/disable debug mode"""
 
diff --git a/search/factory.py b/search/factory.py
index b4bcda9e..cb427418 100644
--- a/search/factory.py
+++ b/search/factory.py
@@ -47,7 +47,6 @@ def create_api_web_app() -> Flask:
     app.json_encoder = ISO8601JSONEncoder
     app.config.from_pyfile('config.py')
 
-
     index.init_app(app)
 
     Base(app)
diff --git a/search/routes/api/tests/test_serialize.py b/search/routes/api/tests/test_serialize.py
index cca56750..8d3ed542 100644
--- a/search/routes/api/tests/test_serialize.py
+++ b/search/routes/api/tests/test_serialize.py
@@ -1,5 +1,161 @@
 """Tests for serializers."""
 
-from unittest import TestCase
+import os
+from unittest import TestCase, mock
+from datetime import datetime
+import json
+import jsonschema
+from .... import domain, encode
+from .. import serialize
 
-from ....domain import DocumentSet, Document
+
+def mock_jsonify(o):
+    return json.dumps(o, cls=encode.ISO8601JSONEncoder)
+
+
+class TestSerializeJSONDocument(TestCase):
+    """Serialize a single :class:`domain.Document` as JSON."""
+
+    SCHEMA_PATH = os.path.abspath('schema/resources/Document.json')
+
+    def setUp(self):
+        with open(self.SCHEMA_PATH) as f:
+            self.schema = json.load(f)
+
+    @mock.patch(f'{serialize.__name__}.url_for', lambda *a, **k: 'http://f/12')
+    @mock.patch(f'{serialize.__name__}.jsonify', mock_jsonify)
+    def test_to_json(self):
+        """Just your run-of-the-mill arXiv document generates valid JSON."""
+        document = domain.Document(
+            submitted_date=datetime.now(),
+            submitted_date_first=datetime.now(),
+            announced_date_first=datetime.now(),
+            id='1234.5678',
+            abstract='very abstract',
+            authors=[
+                domain.Person(full_name='F. Bar', orcid='1234-5678-9012-3456')
+            ],
+            submitter=domain.Person(full_name='S. Ubmitter', author_id='su_1'),
+            modified_date=datetime.now(),
+            updated_date=datetime.now(),
+            is_current=True,
+            is_withdrawn=False,
+            license={
+                'uri': 'http://foo.license/1',
+                'label': 'Notalicense 5.4'
+            },
+            paper_id='1234.5678',
+            paper_id_v='1234.5678v6',
+            title='tiiiitle',
+            source={
+                'flags': 'A',
+                'format': 'pdftotex',
+                'size_bytes': 2
+            },
+            version=6,
+            latest='1234.5678v6',
+            latest_version=6,
+            report_num='somenum1',
+            msc_class=['c1'],
+            acm_class=['z2'],
+            journal_ref='somejournal (1991): 2-34',
+            doi='10.123456/7890',
+            comments='very science',
+            abs_categories='astro-ph.CO foo.BR',
+            formats=['pdf', 'other'],
+            primary_classification=domain.Classification(
+                group={'id': 'foo', 'name': 'Foo Group'},
+                archive={'id': 'foo', 'name': 'Foo Archive'},
+                category={'id': 'foo.BR', 'name': 'Foo Category'},
+            ),
+            secondary_classification=[
+                domain.Classification(
+                    group={'id': 'foo', 'name': 'Foo Group'},
+                    archive={'id': 'foo', 'name': 'Foo Archive'},
+                    category={'id': 'foo.BZ', 'name': 'Baz Category'},
+                )
+            ]
+        )
+        srlzd = serialize.as_json(document)
+        res = jsonschema.RefResolver(
+            'file://%s/' % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
+            None
+        )
+        self.assertIsNone(
+            jsonschema.validate(json.loads(srlzd), self.schema, resolver=res)
+        )
+
+
+class TestSerializeJSONDocumentSet(TestCase):
+    """Serialize a :class:`domain.DocumentSet` as JSON."""
+
+    SCHEMA_PATH = os.path.abspath('schema/resources/DocumentSet.json')
+
+    def setUp(self):
+        with open(self.SCHEMA_PATH) as f:
+            self.schema = json.load(f)
+
+    @mock.patch(f'{serialize.__name__}.url_for', lambda *a, **k: 'http://f/12')
+    @mock.patch(f'{serialize.__name__}.jsonify', mock_jsonify)
+    def test_to_json(self):
+        """Just your run-of-the-mill arXiv document generates valid JSON."""
+        document = domain.Document(
+            submitted_date=datetime.now(),
+            submitted_date_first=datetime.now(),
+            announced_date_first=datetime.now(),
+            id='1234.5678',
+            abstract='very abstract',
+            authors=[
+                domain.Person(full_name='F. Bar', orcid='1234-5678-9012-3456')
+            ],
+            submitter=domain.Person(full_name='S. Ubmitter', author_id='su_1'),
+            modified_date=datetime.now(),
+            updated_date=datetime.now(),
+            is_current=True,
+            is_withdrawn=False,
+            license={
+                'uri': 'http://foo.license/1',
+                'label': 'Notalicense 5.4'
+            },
+            paper_id='1234.5678',
+            paper_id_v='1234.5678v6',
+            title='tiiiitle',
+            source={
+                'flags': 'A',
+                'format': 'pdftotex',
+                'size_bytes': 2
+            },
+            version=6,
+            latest='1234.5678v6',
+            latest_version=6,
+            report_num='somenum1',
+            msc_class=['c1'],
+            acm_class=['z2'],
+            journal_ref='somejournal (1991): 2-34',
+            doi='10.123456/7890',
+            comments='very science',
+            abs_categories='astro-ph.CO foo.BR',
+            formats=['pdf', 'other'],
+            primary_classification=domain.Classification(
+                group={'id': 'foo', 'name': 'Foo Group'},
+                archive={'id': 'foo', 'name': 'Foo Archive'},
+                category={'id': 'foo.BR', 'name': 'Foo Category'},
+            ),
+            secondary_classification=[
+                domain.Classification(
+                    group={'id': 'foo', 'name': 'Foo Group'},
+                    archive={'id': 'foo', 'name': 'Foo Archive'},
+                    category={'id': 'foo.BZ', 'name': 'Baz Category'},
+                )
+            ]
+        )
+        meta = {'start': 0, 'size': 50, 'end': 50, 'total': 500202}
+        document_set = domain.DocumentSet(results=[document], metadata=meta)
+        srlzd = serialize.as_json(document_set)
+        res = jsonschema.RefResolver(
+            'file://%s/' % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
+            None
+        )
+        self.assertIsNone(
+            jsonschema.validate(json.loads(srlzd), self.schema, resolver=res)
+        )

From 49bb04727abf9b6b9280caba97c52729e9efe5f0 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 11 Sep 2018 15:08:04 -0400
Subject: [PATCH 12/77] minor

---
 search/config.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/search/config.py b/search/config.py
index 3cde90c1..d223026e 100644
--- a/search/config.py
+++ b/search/config.py
@@ -9,8 +9,6 @@
 ON = 'yes'
 OFF = 'no'
 
-SERVER_NAME = os.environ.get('SERVER_NAME', 'localhost')
-
 DEBUG = os.environ.get('DEBUG') == ON
 """enable/disable debug mode"""
 

From 6e3315b1a5590b33baf4b3a2ff8d43d4e28b14dd Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Wed, 12 Sep 2018 11:10:41 -0400
Subject: [PATCH 13/77] authn/z and exception handling for API endpoints

---
 Pipfile                            |   1 +
 Pipfile.lock                       |  49 +++++++++++++-
 search/config.py                   |   1 +
 search/controllers/api/__init__.py |   4 +-
 search/factory.py                  |   9 ++-
 search/routes/api/__init__.py      |  10 +--
 search/routes/api/exceptions.py    | 101 +++++++++++++++++++++++++++++
 search/routes/api/serialize.py     |   8 +--
 search/services/index/results.py   |   8 +--
 9 files changed, 176 insertions(+), 15 deletions(-)
 create mode 100644 search/routes/api/exceptions.py

diff --git a/Pipfile b/Pipfile
index 84d8a0c2..45d2e0e2 100644
--- a/Pipfile
+++ b/Pipfile
@@ -5,6 +5,7 @@ name = "pypi"
 
 [packages]
 arxiv-base = "==0.11.1rc5"
+arxiv-auth = "==0.1.0rc14"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 29767b1e..162ea5be 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "8f191c5bd0276ba424857b8ccd030e33606f6faa4ae022d84dbcdd1eacb1692e"
+            "sha256": "b56d17953fa137da21598148d166019ec6ddb84cd8398580351e01275c6a94c7"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -14,6 +14,13 @@
         ]
     },
     "default": {
+        "arxiv-auth": {
+            "hashes": [
+                "sha256:6a10165a775bdfca281a8ffac5aefe737e02b92f62c6ee95c5d793bae30dd457"
+            ],
+            "index": "pypi",
+            "version": "==0.1.0rc14"
+        },
         "arxiv-base": {
             "hashes": [
                 "sha256:e8f8504a4b8ca5a190a3572882cd1cfd544b071cc2deec1e62d90635787da66f"
@@ -293,6 +300,12 @@
             "index": "pypi",
             "version": "==0.560"
         },
+        "mysqlclient": {
+            "hashes": [
+                "sha256:ff8ee1be84215e6c30a746b728c41eb0701a46ca76e343af445b35ce6250644f"
+            ],
+            "version": "==1.3.13"
+        },
         "nose2": {
             "hashes": [
                 "sha256:5c79a2e46ad76999ca1ec7a83080424ed134eafaaf90b6e7554ebdf85aea6f23"
@@ -331,6 +344,14 @@
             "index": "pypi",
             "version": "==2.3.1"
         },
+        "pycountry": {
+            "hashes": [
+                "sha256:195d8174fd6f98f45622d2885826091fc8b2168184422dac574311073ab8386c",
+                "sha256:7f2aa2529c60f6575af3cd644688b201b97016822ce0ce1c4bcc0f7d08900997",
+                "sha256:ea38f839e719be54dfae1b089cbf1917add6ff8f43a55533d8c7c5799656a848"
+            ],
+            "version": "==18.5.26"
+        },
         "pydocstyle": {
             "hashes": [
                 "sha256:08a870edc94508264ed90510db466c6357c7192e0e866561d740624a8fc7d90c",
@@ -348,6 +369,13 @@
             "index": "pypi",
             "version": "==1.6.0"
         },
+        "pyjwt": {
+            "hashes": [
+                "sha256:30b1380ff43b55441283cc2b2676b755cca45693ae3097325dea01f3d110628c",
+                "sha256:4ee413b357d53fd3fb44704577afac88e72e878716116270d722723d65b42176"
+            ],
+            "version": "==1.6.4"
+        },
         "pylama": {
             "hashes": [
                 "sha256:390c1dab1daebdf3d6acc923e551b035c3faa77d8b96b98530c230493f9ec712",
@@ -379,6 +407,19 @@
             "index": "pypi",
             "version": "==2017.3"
         },
+        "redis": {
+            "hashes": [
+                "sha256:8a1900a9f2a0a44ecf6e8b5eb3e967a9909dfed219ad66df094f27f7d6f330fb",
+                "sha256:a22ca993cea2962dbb588f9f30d0015ac4afcc45bee27d3978c0dbe9e97c6c0f"
+            ],
+            "version": "==2.10.6"
+        },
+        "redis-py-cluster": {
+            "hashes": [
+                "sha256:e7349b1f2487d2c763088d81162e4a2ed91adee975c296bdba2ed96fd7450b36"
+            ],
+            "version": "==1.3.5"
+        },
         "requests": {
             "hashes": [
                 "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
@@ -410,6 +451,12 @@
             "index": "pypi",
             "version": "==1.2.1"
         },
+        "sqlalchemy": {
+            "hashes": [
+                "sha256:ef6569ad403520ee13e180e1bfd6ed71a0254192a934ec1dbd3dbf48f4aa9524"
+            ],
+            "version": "==1.2.11"
+        },
         "thrift": {
             "hashes": [
                 "sha256:7d59ac4fdcb2c58037ebd4a9da5f9a49e3e034bf75b3f26d9fe48ba3d8806e6b"
diff --git a/search/config.py b/search/config.py
index d223026e..6d306189 100644
--- a/search/config.py
+++ b/search/config.py
@@ -241,6 +241,7 @@
     ("other", "/format/<arxiv:paper_id>v<string:version>", BASE_SERVER),
 ]
 
+JWT_SECRET = os.environ.get('JWT_SECRET', 'foosecret')
 
 # TODO: one place to set the version, update release notes text, JIRA issue
 # collector, etc.
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index a9730184..768839e6 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -57,7 +57,9 @@ def search(params: MultiDict) -> Tuple[DocumentSet, int, Dict[str, Any]]:
         q.primary_classification = classifications
 
     q = paginate(q, params)     # type: ignore
-    return index.search(q, highlight=False), status.HTTP_200_OK, {}
+    document_set = index.search(q, highlight=False)
+    logger.debug('Got document set with %i results', len(document_set.results))
+    return document_set, status.HTTP_200_OK, {}
 
 
 def paper(paper_id: str) -> Tuple[Document, int, Dict[str, Any]]:
diff --git a/search/factory.py b/search/factory.py
index cb427418..da1a6231 100644
--- a/search/factory.py
+++ b/search/factory.py
@@ -12,6 +12,8 @@
 from search.converters import ArchiveConverter
 from search.encode import ISO8601JSONEncoder
 
+from arxiv.users import auth
+
 s3 = FlaskS3()
 
 
@@ -50,8 +52,13 @@ def create_api_web_app() -> Flask:
     index.init_app(app)
 
     Base(app)
+    auth.Auth(app)
     app.register_blueprint(api.blueprint)
 
-    wrap(app, [request_logs.ClassicLogsMiddleware])
+    wrap(app, [request_logs.ClassicLogsMiddleware,
+               auth.middleware.AuthMiddleware])
+
+    for error, handler in api.exceptions.get_handlers():
+            app.errorhandler(error)(handler)
 
     return app
diff --git a/search/routes/api/__init__.py b/search/routes/api/__init__.py
index 8cfdd3ec..c417bd03 100644
--- a/search/routes/api/__init__.py
+++ b/search/routes/api/__init__.py
@@ -16,10 +16,10 @@
 from werkzeug.exceptions import InternalServerError
 from search.controllers import api
 
-from . import serialize
+from . import serialize, exceptions
 
-# from arxiv.users.auth.decorators import scoped
-# from arxiv.users.auth import scopes
+from arxiv.users.auth.decorators import scoped
+from arxiv.users.auth import scopes
 
 logger = logging.getLogger(__name__)
 
@@ -29,10 +29,11 @@
 JSON = "application/json"
 
 
-# @scoped(scopes.READ_API)
 @blueprint.route('/', methods=['GET'])
+@scoped(required=scopes.READ_PUBLIC)
 def search() -> Response:
     """Main query endpoint."""
+    logger.debug('Got query: %s', request.args)
     data, status_code, headers = api.search(request.args)
     # requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
     # if requested == ATOM_XML:
@@ -41,6 +42,7 @@ def search() -> Response:
 
 
 @blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
+@scoped(required=scopes.READ_PUBLIC)
 def paper(paper_id: str, version: str) -> Response:
     """Document metadata endpoint."""
     data, status_code, headers = api.paper(f'{paper_id}v{version}')
diff --git a/search/routes/api/exceptions.py b/search/routes/api/exceptions.py
new file mode 100644
index 00000000..3a18b5ae
--- /dev/null
+++ b/search/routes/api/exceptions.py
@@ -0,0 +1,101 @@
+"""
+Exception handlers for API endpoints.
+
+.. todo:: This module belongs in :mod:`arxiv.base`.
+
+"""
+
+from typing import Callable, List, Tuple
+
+from werkzeug.exceptions import NotFound, Forbidden, Unauthorized, \
+    MethodNotAllowed, RequestEntityTooLarge, BadRequest, InternalServerError, \
+    HTTPException
+from flask import make_response, Response, jsonify
+
+from arxiv import status
+
+_handlers = []
+
+
+def handler(exception: type) -> Callable:
+    """Generate a decorator to register a handler for an exception."""
+    def deco(func: Callable) -> Callable:
+        """Register a function as an exception handler."""
+        _handlers.append((exception, func))
+        return func
+    return deco
+
+
+def get_handlers() -> List[Tuple[type, Callable]]:
+    """
+    Get a list of registered exception handlers.
+
+    Returns
+    -------
+    list
+        List of (:class:`.HTTPException`, callable) tuples.
+    """
+    return _handlers
+
+
+@handler(NotFound)
+def handle_not_found(error: NotFound) -> Response:
+    """Render the base 404 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_404_NOT_FOUND
+    return response
+
+
+@handler(Forbidden)
+def handle_forbidden(error: Forbidden) -> Response:
+    """Render the base 403 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_403_FORBIDDEN
+    return response
+
+
+@handler(Unauthorized)
+def handle_unauthorized(error: Unauthorized) -> Response:
+    """Render the base 401 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_401_UNAUTHORIZED
+    return response
+
+
+@handler(MethodNotAllowed)
+def handle_method_not_allowed(error: MethodNotAllowed) -> Response:
+    """Render the base 405 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_405_METHOD_NOT_ALLOWED
+    return response
+
+
+@handler(RequestEntityTooLarge)
+def handle_request_entity_too_large(error: RequestEntityTooLarge) -> Response:
+    """Render the base 413 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_413_REQUEST_ENTITY_TOO_LARGE
+    return response
+
+
+@handler(BadRequest)
+def handle_bad_request(error: BadRequest) -> Response:
+    """Render the base 400 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_400_BAD_REQUEST
+    return response
+
+
+@handler(InternalServerError)
+def handle_internal_server_error(error: InternalServerError) -> Response:
+    """Render the base 500 error page."""
+    rendered = jsonify({'code': error.code, 'error': error.description})
+    response = make_response(rendered)
+    response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
+    return response
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index a6a96060..70e325c6 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -135,11 +135,11 @@ def serialize_document(cls, document: Document) -> str:
         return serialized
 
 
-def as_json(document_or_document_set: Union[DocumentSet, Document]) -> str:
+def as_json(document_or_set: Union[DocumentSet, Document]) -> str:
     """Serialize a :class:`DocumentSet` as JSON."""
-    if type(document_or_document_set) is DocumentSet:
-        return JSONSerializer.serialize(document_or_document_set)  # type: ignore
-    return JSONSerializer.serialize_document(document_or_document_set)  # type: ignore
+    if type(document_or_set) is DocumentSet:
+        return JSONSerializer.serialize(document_or_set)  # type: ignore
+    return JSONSerializer.serialize_document(document_or_set)  # type: ignore
 
 
 # class AtomXMLSerializer(BaseSerializer):
diff --git a/search/services/index/results.py b/search/services/index/results.py
index ec6036cf..d6cabc2a 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -9,7 +9,7 @@
 from math import floor
 from typing import Any, Dict, Union
 
-from elasticsearch_dsl.response import Response
+from elasticsearch_dsl.response import Response, Hit
 from elasticsearch_dsl.utils import AttrList, AttrDict
 from search.domain import Document, Query, DocumentSet, Classification, Person
 from arxiv.base import logging
@@ -35,8 +35,7 @@ def _to_author(author_data: dict) -> Person:
     return Person(**data)   # type: ignore
 
 
-def to_document(raw: Union[Response, dict], highlight: bool = True) \
-        -> Document:
+def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
     """Transform an ES search result back into a :class:`.Document`."""
     # typing: ignore
     result: Dict[str, Any] = {}
@@ -44,8 +43,9 @@ def to_document(raw: Union[Response, dict], highlight: bool = True) \
     result['match'] = {}  # Hit on field, but no highlighting.
     result['truncated'] = {}    # Preview is truncated.
 
+    logger.debug('Raw data is a %s instance', type(raw))
     for key in Document.fields():
-        if type(raw) is Response:
+        if type(raw) is Hit:
             if not hasattr(raw, key):
                 continue
             value = getattr(raw, key)

From ea453380b976a1efc7840520f011a6034c63c290 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Wed, 12 Sep 2018 11:40:48 -0400
Subject: [PATCH 14/77] ARXIVNG-1177 added auth, more tests

---
 search/routes/api/exceptions.py     |  10 ++-
 search/routes/api/tests/test_api.py | 115 ++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 search/routes/api/tests/test_api.py

diff --git a/search/routes/api/exceptions.py b/search/routes/api/exceptions.py
index 3a18b5ae..bdf9db84 100644
--- a/search/routes/api/exceptions.py
+++ b/search/routes/api/exceptions.py
@@ -13,6 +13,9 @@
 from flask import make_response, Response, jsonify
 
 from arxiv import status
+from arxiv.base import logging
+
+logger = logging.getLogger(__name__)
 
 _handlers = []
 
@@ -95,7 +98,12 @@ def handle_bad_request(error: BadRequest) -> Response:
 @handler(InternalServerError)
 def handle_internal_server_error(error: InternalServerError) -> Response:
     """Render the base 500 error page."""
-    rendered = jsonify({'code': error.code, 'error': error.description})
+    if isinstance(error, HTTPException):
+        rendered = jsonify({'code': error.code, 'error': error.description})
+    else:
+        logger.error('Caught unhandled exception: %s', error)
+        rendered = jsonify({'code': status.HTTP_500_INTERNAL_SERVER_ERROR,
+                            'error': 'Unexpected error'})
     response = make_response(rendered)
     response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
     return response
diff --git a/search/routes/api/tests/test_api.py b/search/routes/api/tests/test_api.py
new file mode 100644
index 00000000..1d5b1f9c
--- /dev/null
+++ b/search/routes/api/tests/test_api.py
@@ -0,0 +1,115 @@
+"""Tests for API routes."""
+
+import os
+import json
+from datetime import datetime
+from unittest import TestCase, mock
+
+import jsonschema
+
+from arxiv.users import helpers, auth
+from arxiv.users.domain import Scope
+from arxiv import status
+
+from search import factory
+from search import domain
+
+
+class TestAPISearchRequests(TestCase):
+    """Requests against the main search API."""
+
+    SCHEMA_PATH = os.path.abspath('schema/resources/DocumentSet.json')
+
+    def setUp(self):
+        """Instantiate and configure an API app."""
+        jwt_secret = 'foosecret'
+        os.environ['JWT_SECRET'] = jwt_secret
+        self.app = factory.create_api_web_app()
+        self.app.config['JWT_SECRET'] = jwt_secret
+        self.client = self.app.test_client()
+
+        with open(self.SCHEMA_PATH) as f:
+            self.schema = json.load(f)
+
+    def test_request_without_token(self):
+        """No auth token is provided on the request."""
+        response = self.client.get('/')
+        self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
+
+    def test_with_token_lacking_scope(self):
+        """Client auth token lacks required public read scope."""
+        token = helpers.generate_token('1234', 'foo@bar.com', 'foouser',
+                                       scope=[Scope('something', 'read')])
+        response = self.client.get('/', headers={'Authorization': token})
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+
+    @mock.patch(f'{factory.__name__}.api.api')
+    def test_with_valid_token(self, mock_api_controller):
+        """Client auth token has required public read scope."""
+        document = domain.Document(
+            submitted_date=datetime.now(),
+            submitted_date_first=datetime.now(),
+            announced_date_first=datetime.now(),
+            id='1234.5678',
+            abstract='very abstract',
+            authors=[
+                domain.Person(full_name='F. Bar', orcid='1234-5678-9012-3456')
+            ],
+            submitter=domain.Person(full_name='S. Ubmitter', author_id='su_1'),
+            modified_date=datetime.now(),
+            updated_date=datetime.now(),
+            is_current=True,
+            is_withdrawn=False,
+            license={
+                'uri': 'http://foo.license/1',
+                'label': 'Notalicense 5.4'
+            },
+            paper_id='1234.5678',
+            paper_id_v='1234.5678v6',
+            title='tiiiitle',
+            source={
+                'flags': 'A',
+                'format': 'pdftotex',
+                'size_bytes': 2
+            },
+            version=6,
+            latest='1234.5678v6',
+            latest_version=6,
+            report_num='somenum1',
+            msc_class=['c1'],
+            acm_class=['z2'],
+            journal_ref='somejournal (1991): 2-34',
+            doi='10.123456/7890',
+            comments='very science',
+            abs_categories='astro-ph.CO foo.BR',
+            formats=['pdf', 'other'],
+            primary_classification=domain.Classification(
+                group={'id': 'foo', 'name': 'Foo Group'},
+                archive={'id': 'foo', 'name': 'Foo Archive'},
+                category={'id': 'foo.BR', 'name': 'Foo Category'},
+            ),
+            secondary_classification=[
+                domain.Classification(
+                    group={'id': 'foo', 'name': 'Foo Group'},
+                    archive={'id': 'foo', 'name': 'Foo Archive'},
+                    category={'id': 'foo.BZ', 'name': 'Baz Category'},
+                )
+            ]
+        )
+        docs = domain.DocumentSet(
+            results=[document],
+            metadata={'start': 0, 'end': 1, 'size': 50, 'total': 1}
+        )
+        mock_api_controller.search.return_value = docs, status.HTTP_200_OK, {}
+        token = helpers.generate_token('1234', 'foo@bar.com', 'foouser',
+                                       scope=[auth.scopes.READ_PUBLIC])
+        response = self.client.get('/', headers={'Authorization': token})
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        data = json.loads(response.data)
+        res = jsonschema.RefResolver(
+            'file://%s/' % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
+            None
+        )
+        self.assertIsNone(jsonschema.validate(data, self.schema, resolver=res),
+                          'Response content is valid per schema')

From 45d8cbc1f1e9d75ede07a06c8a9e3b67dd4c2ee2 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Sep 2018 14:44:52 -0400
Subject: [PATCH 15/77] minor

---
 Dockerfile     |  1 -
 Dockerfile-api | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 wsgi-api.py    | 13 +++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile-api
 create mode 100644 wsgi-api.py

diff --git a/Dockerfile b/Dockerfile
index 9ddc3a46..f9bbd0c9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,7 +8,6 @@ FROM arxiv/base:0.6.1
 WORKDIR /opt/arxiv
 
 # Add Python application and configuration.
-ADD requirements/prod.txt /opt/arxiv/requirements.txt
 ADD app.py /opt/arxiv/
 ADD Pipfile /opt/arxiv/
 ADD Pipfile.lock /opt/arxiv/
diff --git a/Dockerfile-api b/Dockerfile-api
new file mode 100644
index 00000000..48e49dd3
--- /dev/null
+++ b/Dockerfile-api
@@ -0,0 +1,59 @@
+# arxiv/search-api
+#
+# Defines the runtime for the arXiv search API, which provides a metadata
+# query API backed by Elasticsearch.
+
+FROM arxiv/base:0.6.1
+
+WORKDIR /opt/arxiv
+
+# Install MySQL.
+RUN yum install -y which mysql mysql-devel
+
+# Add Python application and configuration.
+ADD app.py /opt/arxiv/
+ADD Pipfile /opt/arxiv/
+ADD Pipfile.lock /opt/arxiv/
+RUN pip install -U pip pipenv
+RUN pipenv install
+
+ENV PATH "/opt/arxiv:${PATH}"
+
+ADD schema /opt/arxiv/schema
+ADD mappings /opt/arxiv/mappings
+ADD search /opt/arxiv/search
+ADD wsgi-api.py /opt/arxiv/wsgi.py
+RUN pip install uwsgi
+
+ADD bin/start_search.sh /opt/arxiv/
+RUN chmod +x /opt/arxiv/start_search.sh
+
+ENV LC_ALL en_US.utf8
+ENV LANG en_US.utf8
+ENV LOGLEVEL 40
+ENV FLASK_DEBUG 1
+ENV FLASK_APP /opt/arxiv/app.py
+
+ENV ELASTICSEARCH_SERVICE_HOST 127.0.0.1
+ENV ELASTICSEARCH_SERVICE_PORT 9200
+ENV ELASTICSEARCH_PORT_9200_PROTO http
+ENV ELASTICSEARCH_INDEX arxiv
+ENV ELASTICSEARCH_USER elastic
+ENV ELASTICSEARCH_PASSWORD changeme
+ENV METADATA_ENDPOINT https://arxiv.org/docmeta_bulk/
+
+EXPOSE 8000
+
+#CMD /bin/bash
+ENTRYPOINT ["/opt/arxiv/start_search.sh"]
+CMD ["--http-socket", ":8000", \
+     "-M", \
+     "-t 3000", \
+     "--manage-script-name", \
+     "--processes", "8", \
+     "--threads", "1", \
+     "--async", "100", \
+     "--ugreen", \
+     "--buffer-size", "65535", \
+     "--mount", "/metadata=wsgi.py", \
+     "--logformat", "%(addr) %(addr) - %(user_id)|%(session_id) [%(rtime)] [%(uagent)] \"%(method) %(uri) %(proto)\" %(status) %(size) %(micros) %(ttfb)"]
diff --git a/wsgi-api.py b/wsgi-api.py
new file mode 100644
index 00000000..fc0eb484
--- /dev/null
+++ b/wsgi-api.py
@@ -0,0 +1,13 @@
+"""Web Server Gateway Interface entry-point."""
+
+from search.factory import create_api_web_app
+import os
+
+
+def application(environ, start_response):
+    """WSGI application factory."""
+    for key, value in environ.items():
+        if type(value) is str:
+            os.environ[key] = value
+    app = create_api_web_app()
+    return app(environ, start_response)

From 57da6f594730fe9121cd00e2db98dda8824311f5 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Sep 2018 15:36:21 -0400
Subject: [PATCH 16/77] updated docs

---
 docs/source/api/search.agent.consumer.rst     |   7 +
 docs/source/api/search.agent.rst              |  22 ++-
 docs/source/api/search.agent.tests.rst        |  16 +++
 .../search.agent.tests.test_integration.rst   |   7 +
 ...arch.agent.tests.test_record_processor.rst |   7 +
 docs/source/api/search.config.rst             |   7 +
 docs/source/api/search.context.rst            |   7 +
 .../api/search.controllers.advanced.forms.rst |   7 +
 .../api/search.controllers.advanced.rst       |  10 +-
 .../api/search.controllers.advanced.tests.rst |   7 +
 docs/source/api/search.controllers.api.rst    |   8 ++
 docs/source/api/search.controllers.rst        |  12 +-
 .../api/search.controllers.simple.forms.rst   |   7 +
 docs/source/api/search.controllers.simple.rst |  10 +-
 .../api/search.controllers.simple.tests.rst   |   7 +
 docs/source/api/search.controllers.tests.rst  |   7 +
 docs/source/api/search.controllers.util.rst   |   7 +
 docs/source/api/search.converters.rst         |   7 +
 docs/source/api/search.domain.advanced.rst    |   7 +
 docs/source/api/search.domain.api.rst         |   7 +
 docs/source/api/search.domain.base.rst        |   7 +
 docs/source/api/search.domain.rst             |  27 +---
 docs/source/api/search.encode.rst             |   7 +
 docs/source/api/search.factory.rst            |   7 +
 docs/source/api/search.process.rst            |  10 +-
 docs/source/api/search.process.tests.rst      |   7 +
 docs/source/api/search.process.transform.rst  |   7 +
 .../api/search.routes.api.exceptions.rst      |   7 +
 docs/source/api/search.routes.api.rst         |  23 ++++
 .../api/search.routes.api.serialize.rst       |   7 +
 docs/source/api/search.routes.api.tests.rst   |  16 +++
 .../api/search.routes.api.tests.test_api.rst  |   7 +
 ...search.routes.api.tests.test_serialize.rst |   7 +
 docs/source/api/search.routes.rst             |  16 ++-
 docs/source/api/search.routes.ui.rst          |   7 +
 docs/source/api/search.rst                    |  54 +-------
 docs/source/api/search.services.fulltext.rst  |   7 +
 .../api/search.services.index.advanced.rst    |   7 +
 .../api/search.services.index.authors.rst     |   7 +
 .../api/search.services.index.exceptions.rst  |   7 +
 .../search.services.index.highlighting.rst    |   7 +
 .../api/search.services.index.prepare.rst     |   7 +
 .../api/search.services.index.results.rst     |   7 +
 docs/source/api/search.services.index.rst     |  29 ++++
 .../api/search.services.index.simple.rst      |   7 +
 .../api/search.services.index.tests.rst       |  18 +++
 ...arch.services.index.tests.test_reindex.rst |   7 +
 ...arch.services.index.tests.test_results.rst |   7 +
 .../search.services.index.tests.test_util.rst |   7 +
 .../api/search.services.index.tests.tests.rst |   7 +
 .../source/api/search.services.index.util.rst |   7 +
 docs/source/api/search.services.metadata.rst  |   7 +
 docs/source/api/search.services.rst           |  32 ++---
 docs/source/api/search.services.tests.rst     |  16 +++
 .../search.services.tests.test_fulltext.rst   |   7 +
 .../search.services.tests.test_metadata.rst   |   7 +
 docs/source/api/search.tests.rst              |  16 +++
 .../api/search.tests.test_advanced_search.rst |   7 +
 .../search.tests.test_param_persistence.rst   |   7 +
 docs/source/architecture.rst                  | 126 +++---------------
 docs/source/index.rst                         |   2 +
 docs/source/search_api.rst                    |  36 +++++
 docs/source/search_ui.rst                     |  67 ++++++++++
 search/routes/api/serialize.py                |  23 ++--
 64 files changed, 624 insertions(+), 259 deletions(-)
 create mode 100644 docs/source/api/search.agent.consumer.rst
 create mode 100644 docs/source/api/search.agent.tests.rst
 create mode 100644 docs/source/api/search.agent.tests.test_integration.rst
 create mode 100644 docs/source/api/search.agent.tests.test_record_processor.rst
 create mode 100644 docs/source/api/search.config.rst
 create mode 100644 docs/source/api/search.context.rst
 create mode 100644 docs/source/api/search.controllers.advanced.forms.rst
 create mode 100644 docs/source/api/search.controllers.advanced.tests.rst
 create mode 100644 docs/source/api/search.controllers.api.rst
 create mode 100644 docs/source/api/search.controllers.simple.forms.rst
 create mode 100644 docs/source/api/search.controllers.simple.tests.rst
 create mode 100644 docs/source/api/search.controllers.tests.rst
 create mode 100644 docs/source/api/search.controllers.util.rst
 create mode 100644 docs/source/api/search.converters.rst
 create mode 100644 docs/source/api/search.domain.advanced.rst
 create mode 100644 docs/source/api/search.domain.api.rst
 create mode 100644 docs/source/api/search.domain.base.rst
 create mode 100644 docs/source/api/search.encode.rst
 create mode 100644 docs/source/api/search.factory.rst
 create mode 100644 docs/source/api/search.process.tests.rst
 create mode 100644 docs/source/api/search.process.transform.rst
 create mode 100644 docs/source/api/search.routes.api.exceptions.rst
 create mode 100644 docs/source/api/search.routes.api.rst
 create mode 100644 docs/source/api/search.routes.api.serialize.rst
 create mode 100644 docs/source/api/search.routes.api.tests.rst
 create mode 100644 docs/source/api/search.routes.api.tests.test_api.rst
 create mode 100644 docs/source/api/search.routes.api.tests.test_serialize.rst
 create mode 100644 docs/source/api/search.routes.ui.rst
 create mode 100644 docs/source/api/search.services.fulltext.rst
 create mode 100644 docs/source/api/search.services.index.advanced.rst
 create mode 100644 docs/source/api/search.services.index.authors.rst
 create mode 100644 docs/source/api/search.services.index.exceptions.rst
 create mode 100644 docs/source/api/search.services.index.highlighting.rst
 create mode 100644 docs/source/api/search.services.index.prepare.rst
 create mode 100644 docs/source/api/search.services.index.results.rst
 create mode 100644 docs/source/api/search.services.index.rst
 create mode 100644 docs/source/api/search.services.index.simple.rst
 create mode 100644 docs/source/api/search.services.index.tests.rst
 create mode 100644 docs/source/api/search.services.index.tests.test_reindex.rst
 create mode 100644 docs/source/api/search.services.index.tests.test_results.rst
 create mode 100644 docs/source/api/search.services.index.tests.test_util.rst
 create mode 100644 docs/source/api/search.services.index.tests.tests.rst
 create mode 100644 docs/source/api/search.services.index.util.rst
 create mode 100644 docs/source/api/search.services.metadata.rst
 create mode 100644 docs/source/api/search.services.tests.rst
 create mode 100644 docs/source/api/search.services.tests.test_fulltext.rst
 create mode 100644 docs/source/api/search.services.tests.test_metadata.rst
 create mode 100644 docs/source/api/search.tests.rst
 create mode 100644 docs/source/api/search.tests.test_advanced_search.rst
 create mode 100644 docs/source/api/search.tests.test_param_persistence.rst
 create mode 100644 docs/source/search_api.rst
 create mode 100644 docs/source/search_ui.rst

diff --git a/docs/source/api/search.agent.consumer.rst b/docs/source/api/search.agent.consumer.rst
new file mode 100644
index 00000000..e5003bcb
--- /dev/null
+++ b/docs/source/api/search.agent.consumer.rst
@@ -0,0 +1,7 @@
+search.agent.consumer module
+============================
+
+.. automodule:: search.agent.consumer
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.agent.rst b/docs/source/api/search.agent.rst
index e586b80b..5f56ca71 100644
--- a/docs/source/api/search.agent.rst
+++ b/docs/source/api/search.agent.rst
@@ -6,23 +6,17 @@ search.agent package
     :undoc-members:
     :show-inheritance:
 
-Submodules
-----------
+Subpackages
+-----------
 
-search.agent.base module
-------------------------
+.. toctree::
 
-.. automodule:: search.agent.base
-    :members:
-    :undoc-members:
-    :show-inheritance:
+    search.agent.tests
 
-search.agent.consumer module
-----------------------------
+Submodules
+----------
 
-.. automodule:: search.agent.consumer
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.agent.consumer
 
diff --git a/docs/source/api/search.agent.tests.rst b/docs/source/api/search.agent.tests.rst
new file mode 100644
index 00000000..1434db36
--- /dev/null
+++ b/docs/source/api/search.agent.tests.rst
@@ -0,0 +1,16 @@
+search.agent.tests package
+==========================
+
+.. automodule:: search.agent.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+
+   search.agent.tests.test_integration
+   search.agent.tests.test_record_processor
+
diff --git a/docs/source/api/search.agent.tests.test_integration.rst b/docs/source/api/search.agent.tests.test_integration.rst
new file mode 100644
index 00000000..a48b0658
--- /dev/null
+++ b/docs/source/api/search.agent.tests.test_integration.rst
@@ -0,0 +1,7 @@
+search.agent.tests.test\_integration module
+===========================================
+
+.. automodule:: search.agent.tests.test_integration
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.agent.tests.test_record_processor.rst b/docs/source/api/search.agent.tests.test_record_processor.rst
new file mode 100644
index 00000000..fecb043f
--- /dev/null
+++ b/docs/source/api/search.agent.tests.test_record_processor.rst
@@ -0,0 +1,7 @@
+search.agent.tests.test\_record\_processor module
+=================================================
+
+.. automodule:: search.agent.tests.test_record_processor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.config.rst b/docs/source/api/search.config.rst
new file mode 100644
index 00000000..d73af70a
--- /dev/null
+++ b/docs/source/api/search.config.rst
@@ -0,0 +1,7 @@
+search.config module
+====================
+
+.. automodule:: search.config
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.context.rst b/docs/source/api/search.context.rst
new file mode 100644
index 00000000..8491c8cc
--- /dev/null
+++ b/docs/source/api/search.context.rst
@@ -0,0 +1,7 @@
+search.context module
+=====================
+
+.. automodule:: search.context
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.advanced.forms.rst b/docs/source/api/search.controllers.advanced.forms.rst
new file mode 100644
index 00000000..d60013e3
--- /dev/null
+++ b/docs/source/api/search.controllers.advanced.forms.rst
@@ -0,0 +1,7 @@
+search.controllers.advanced.forms module
+========================================
+
+.. automodule:: search.controllers.advanced.forms
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.advanced.rst b/docs/source/api/search.controllers.advanced.rst
index 8e0b6696..6d3f66f2 100644
--- a/docs/source/api/search.controllers.advanced.rst
+++ b/docs/source/api/search.controllers.advanced.rst
@@ -9,12 +9,8 @@ search.controllers.advanced package
 Submodules
 ----------
 
-search.controllers.advanced.forms module
-----------------------------------------
-
-.. automodule:: search.controllers.advanced.forms
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.controllers.advanced.forms
+   search.controllers.advanced.tests
 
diff --git a/docs/source/api/search.controllers.advanced.tests.rst b/docs/source/api/search.controllers.advanced.tests.rst
new file mode 100644
index 00000000..f563dfd1
--- /dev/null
+++ b/docs/source/api/search.controllers.advanced.tests.rst
@@ -0,0 +1,7 @@
+search.controllers.advanced.tests module
+========================================
+
+.. automodule:: search.controllers.advanced.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.api.rst b/docs/source/api/search.controllers.api.rst
new file mode 100644
index 00000000..81ce16b5
--- /dev/null
+++ b/docs/source/api/search.controllers.api.rst
@@ -0,0 +1,8 @@
+search.controllers.api package
+==============================
+
+.. automodule:: search.controllers.api
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
diff --git a/docs/source/api/search.controllers.rst b/docs/source/api/search.controllers.rst
index 21ac25be..90eb38db 100644
--- a/docs/source/api/search.controllers.rst
+++ b/docs/source/api/search.controllers.rst
@@ -12,18 +12,14 @@ Subpackages
 .. toctree::
 
     search.controllers.advanced
-    search.controllers.authors
+    search.controllers.api
     search.controllers.simple
 
 Submodules
 ----------
 
-search.controllers.util module
-------------------------------
-
-.. automodule:: search.controllers.util
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.controllers.tests
+   search.controllers.util
 
diff --git a/docs/source/api/search.controllers.simple.forms.rst b/docs/source/api/search.controllers.simple.forms.rst
new file mode 100644
index 00000000..676a5e0a
--- /dev/null
+++ b/docs/source/api/search.controllers.simple.forms.rst
@@ -0,0 +1,7 @@
+search.controllers.simple.forms module
+======================================
+
+.. automodule:: search.controllers.simple.forms
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.simple.rst b/docs/source/api/search.controllers.simple.rst
index 4d85e87d..8a5c6bc5 100644
--- a/docs/source/api/search.controllers.simple.rst
+++ b/docs/source/api/search.controllers.simple.rst
@@ -9,12 +9,8 @@ search.controllers.simple package
 Submodules
 ----------
 
-search.controllers.simple.forms module
---------------------------------------
-
-.. automodule:: search.controllers.simple.forms
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.controllers.simple.forms
+   search.controllers.simple.tests
 
diff --git a/docs/source/api/search.controllers.simple.tests.rst b/docs/source/api/search.controllers.simple.tests.rst
new file mode 100644
index 00000000..6956a652
--- /dev/null
+++ b/docs/source/api/search.controllers.simple.tests.rst
@@ -0,0 +1,7 @@
+search.controllers.simple.tests module
+======================================
+
+.. automodule:: search.controllers.simple.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.tests.rst b/docs/source/api/search.controllers.tests.rst
new file mode 100644
index 00000000..04407d04
--- /dev/null
+++ b/docs/source/api/search.controllers.tests.rst
@@ -0,0 +1,7 @@
+search.controllers.tests module
+===============================
+
+.. automodule:: search.controllers.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.controllers.util.rst b/docs/source/api/search.controllers.util.rst
new file mode 100644
index 00000000..32286608
--- /dev/null
+++ b/docs/source/api/search.controllers.util.rst
@@ -0,0 +1,7 @@
+search.controllers.util module
+==============================
+
+.. automodule:: search.controllers.util
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.converters.rst b/docs/source/api/search.converters.rst
new file mode 100644
index 00000000..8ee1e63d
--- /dev/null
+++ b/docs/source/api/search.converters.rst
@@ -0,0 +1,7 @@
+search.converters module
+========================
+
+.. automodule:: search.converters
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.domain.advanced.rst b/docs/source/api/search.domain.advanced.rst
new file mode 100644
index 00000000..f2fbd960
--- /dev/null
+++ b/docs/source/api/search.domain.advanced.rst
@@ -0,0 +1,7 @@
+search.domain.advanced module
+=============================
+
+.. automodule:: search.domain.advanced
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.domain.api.rst b/docs/source/api/search.domain.api.rst
new file mode 100644
index 00000000..3390a2ba
--- /dev/null
+++ b/docs/source/api/search.domain.api.rst
@@ -0,0 +1,7 @@
+search.domain.api module
+========================
+
+.. automodule:: search.domain.api
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.domain.base.rst b/docs/source/api/search.domain.base.rst
new file mode 100644
index 00000000..3623beee
--- /dev/null
+++ b/docs/source/api/search.domain.base.rst
@@ -0,0 +1,7 @@
+search.domain.base module
+=========================
+
+.. automodule:: search.domain.base
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.domain.rst b/docs/source/api/search.domain.rst
index 165458be..113ec00a 100644
--- a/docs/source/api/search.domain.rst
+++ b/docs/source/api/search.domain.rst
@@ -9,28 +9,9 @@ search.domain package
 Submodules
 ----------
 
-search.domain.advanced module
------------------------------
-
-.. automodule:: search.domain.advanced
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.domain.author module
----------------------------
-
-.. automodule:: search.domain.author
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.domain.base module
--------------------------
-
-.. automodule:: search.domain.base
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.domain.advanced
+   search.domain.api
+   search.domain.base
 
diff --git a/docs/source/api/search.encode.rst b/docs/source/api/search.encode.rst
new file mode 100644
index 00000000..4fedc406
--- /dev/null
+++ b/docs/source/api/search.encode.rst
@@ -0,0 +1,7 @@
+search.encode module
+====================
+
+.. automodule:: search.encode
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.factory.rst b/docs/source/api/search.factory.rst
new file mode 100644
index 00000000..80814f0d
--- /dev/null
+++ b/docs/source/api/search.factory.rst
@@ -0,0 +1,7 @@
+search.factory module
+=====================
+
+.. automodule:: search.factory
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.process.rst b/docs/source/api/search.process.rst
index df27115a..a8df3ddc 100644
--- a/docs/source/api/search.process.rst
+++ b/docs/source/api/search.process.rst
@@ -9,12 +9,8 @@ search.process package
 Submodules
 ----------
 
-search.process.transform module
--------------------------------
-
-.. automodule:: search.process.transform
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.process.tests
+   search.process.transform
 
diff --git a/docs/source/api/search.process.tests.rst b/docs/source/api/search.process.tests.rst
new file mode 100644
index 00000000..956540ca
--- /dev/null
+++ b/docs/source/api/search.process.tests.rst
@@ -0,0 +1,7 @@
+search.process.tests module
+===========================
+
+.. automodule:: search.process.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.process.transform.rst b/docs/source/api/search.process.transform.rst
new file mode 100644
index 00000000..e4858d04
--- /dev/null
+++ b/docs/source/api/search.process.transform.rst
@@ -0,0 +1,7 @@
+search.process.transform module
+===============================
+
+.. automodule:: search.process.transform
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.routes.api.exceptions.rst b/docs/source/api/search.routes.api.exceptions.rst
new file mode 100644
index 00000000..107ac73d
--- /dev/null
+++ b/docs/source/api/search.routes.api.exceptions.rst
@@ -0,0 +1,7 @@
+search.routes.api.exceptions module
+===================================
+
+.. automodule:: search.routes.api.exceptions
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.routes.api.rst b/docs/source/api/search.routes.api.rst
new file mode 100644
index 00000000..df729ea9
--- /dev/null
+++ b/docs/source/api/search.routes.api.rst
@@ -0,0 +1,23 @@
+search.routes.api package
+=========================
+
+.. automodule:: search.routes.api
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+
+    search.routes.api.tests
+
+Submodules
+----------
+
+.. toctree::
+
+   search.routes.api.exceptions
+   search.routes.api.serialize
+
diff --git a/docs/source/api/search.routes.api.serialize.rst b/docs/source/api/search.routes.api.serialize.rst
new file mode 100644
index 00000000..bc2ea25b
--- /dev/null
+++ b/docs/source/api/search.routes.api.serialize.rst
@@ -0,0 +1,7 @@
+search.routes.api.serialize module
+==================================
+
+.. automodule:: search.routes.api.serialize
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.routes.api.tests.rst b/docs/source/api/search.routes.api.tests.rst
new file mode 100644
index 00000000..2fdc0aba
--- /dev/null
+++ b/docs/source/api/search.routes.api.tests.rst
@@ -0,0 +1,16 @@
+search.routes.api.tests package
+===============================
+
+.. automodule:: search.routes.api.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+
+   search.routes.api.tests.test_api
+   search.routes.api.tests.test_serialize
+
diff --git a/docs/source/api/search.routes.api.tests.test_api.rst b/docs/source/api/search.routes.api.tests.test_api.rst
new file mode 100644
index 00000000..aa9840dc
--- /dev/null
+++ b/docs/source/api/search.routes.api.tests.test_api.rst
@@ -0,0 +1,7 @@
+search.routes.api.tests.test\_api module
+========================================
+
+.. automodule:: search.routes.api.tests.test_api
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.routes.api.tests.test_serialize.rst b/docs/source/api/search.routes.api.tests.test_serialize.rst
new file mode 100644
index 00000000..15883f1a
--- /dev/null
+++ b/docs/source/api/search.routes.api.tests.test_serialize.rst
@@ -0,0 +1,7 @@
+search.routes.api.tests.test\_serialize module
+==============================================
+
+.. automodule:: search.routes.api.tests.test_serialize
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.routes.rst b/docs/source/api/search.routes.rst
index 5332161d..de6b7302 100644
--- a/docs/source/api/search.routes.rst
+++ b/docs/source/api/search.routes.rst
@@ -6,15 +6,17 @@ search.routes package
     :undoc-members:
     :show-inheritance:
 
+Subpackages
+-----------
+
+.. toctree::
+
+    search.routes.api
+
 Submodules
 ----------
 
-search.routes.ui module
------------------------
-
-.. automodule:: search.routes.ui
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.routes.ui
 
diff --git a/docs/source/api/search.routes.ui.rst b/docs/source/api/search.routes.ui.rst
new file mode 100644
index 00000000..d69da82b
--- /dev/null
+++ b/docs/source/api/search.routes.ui.rst
@@ -0,0 +1,7 @@
+search.routes.ui module
+=======================
+
+.. automodule:: search.routes.ui
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.rst b/docs/source/api/search.rst
index db5b77c6..da532f7f 100644
--- a/docs/source/api/search.rst
+++ b/docs/source/api/search.rst
@@ -17,56 +17,16 @@ Subpackages
     search.process
     search.routes
     search.services
+    search.tests
 
 Submodules
 ----------
 
-search.config module
---------------------
-
-.. automodule:: search.config
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.context module
----------------------
-
-.. automodule:: search.context
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.converter module
------------------------
-
-.. automodule:: search.converter
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.factory module
----------------------
-
-.. automodule:: search.factory
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.logging module
----------------------
-
-.. automodule:: search.logging
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.util module
-------------------
-
-.. automodule:: search.util
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.config
+   search.context
+   search.converters
+   search.encode
+   search.factory
 
diff --git a/docs/source/api/search.services.fulltext.rst b/docs/source/api/search.services.fulltext.rst
new file mode 100644
index 00000000..5a74cdfe
--- /dev/null
+++ b/docs/source/api/search.services.fulltext.rst
@@ -0,0 +1,7 @@
+search.services.fulltext module
+===============================
+
+.. automodule:: search.services.fulltext
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.advanced.rst b/docs/source/api/search.services.index.advanced.rst
new file mode 100644
index 00000000..7bad6d7d
--- /dev/null
+++ b/docs/source/api/search.services.index.advanced.rst
@@ -0,0 +1,7 @@
+search.services.index.advanced module
+=====================================
+
+.. automodule:: search.services.index.advanced
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.authors.rst b/docs/source/api/search.services.index.authors.rst
new file mode 100644
index 00000000..73bc1385
--- /dev/null
+++ b/docs/source/api/search.services.index.authors.rst
@@ -0,0 +1,7 @@
+search.services.index.authors module
+====================================
+
+.. automodule:: search.services.index.authors
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.exceptions.rst b/docs/source/api/search.services.index.exceptions.rst
new file mode 100644
index 00000000..c90e0a77
--- /dev/null
+++ b/docs/source/api/search.services.index.exceptions.rst
@@ -0,0 +1,7 @@
+search.services.index.exceptions module
+=======================================
+
+.. automodule:: search.services.index.exceptions
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.highlighting.rst b/docs/source/api/search.services.index.highlighting.rst
new file mode 100644
index 00000000..fcac46d3
--- /dev/null
+++ b/docs/source/api/search.services.index.highlighting.rst
@@ -0,0 +1,7 @@
+search.services.index.highlighting module
+=========================================
+
+.. automodule:: search.services.index.highlighting
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.prepare.rst b/docs/source/api/search.services.index.prepare.rst
new file mode 100644
index 00000000..4509ae16
--- /dev/null
+++ b/docs/source/api/search.services.index.prepare.rst
@@ -0,0 +1,7 @@
+search.services.index.prepare module
+====================================
+
+.. automodule:: search.services.index.prepare
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.results.rst b/docs/source/api/search.services.index.results.rst
new file mode 100644
index 00000000..95eb785a
--- /dev/null
+++ b/docs/source/api/search.services.index.results.rst
@@ -0,0 +1,7 @@
+search.services.index.results module
+====================================
+
+.. automodule:: search.services.index.results
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.rst b/docs/source/api/search.services.index.rst
new file mode 100644
index 00000000..979c0cbc
--- /dev/null
+++ b/docs/source/api/search.services.index.rst
@@ -0,0 +1,29 @@
+search.services.index package
+=============================
+
+.. automodule:: search.services.index
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Subpackages
+-----------
+
+.. toctree::
+
+    search.services.index.tests
+
+Submodules
+----------
+
+.. toctree::
+
+   search.services.index.advanced
+   search.services.index.authors
+   search.services.index.exceptions
+   search.services.index.highlighting
+   search.services.index.prepare
+   search.services.index.results
+   search.services.index.simple
+   search.services.index.util
+
diff --git a/docs/source/api/search.services.index.simple.rst b/docs/source/api/search.services.index.simple.rst
new file mode 100644
index 00000000..b59f8929
--- /dev/null
+++ b/docs/source/api/search.services.index.simple.rst
@@ -0,0 +1,7 @@
+search.services.index.simple module
+===================================
+
+.. automodule:: search.services.index.simple
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.tests.rst b/docs/source/api/search.services.index.tests.rst
new file mode 100644
index 00000000..9479c75f
--- /dev/null
+++ b/docs/source/api/search.services.index.tests.rst
@@ -0,0 +1,18 @@
+search.services.index.tests package
+===================================
+
+.. automodule:: search.services.index.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+
+   search.services.index.tests.test_reindex
+   search.services.index.tests.test_results
+   search.services.index.tests.test_util
+   search.services.index.tests.tests
+
diff --git a/docs/source/api/search.services.index.tests.test_reindex.rst b/docs/source/api/search.services.index.tests.test_reindex.rst
new file mode 100644
index 00000000..d89bc7b8
--- /dev/null
+++ b/docs/source/api/search.services.index.tests.test_reindex.rst
@@ -0,0 +1,7 @@
+search.services.index.tests.test\_reindex module
+================================================
+
+.. automodule:: search.services.index.tests.test_reindex
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.tests.test_results.rst b/docs/source/api/search.services.index.tests.test_results.rst
new file mode 100644
index 00000000..7bb1ba2b
--- /dev/null
+++ b/docs/source/api/search.services.index.tests.test_results.rst
@@ -0,0 +1,7 @@
+search.services.index.tests.test\_results module
+================================================
+
+.. automodule:: search.services.index.tests.test_results
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.tests.test_util.rst b/docs/source/api/search.services.index.tests.test_util.rst
new file mode 100644
index 00000000..cc535414
--- /dev/null
+++ b/docs/source/api/search.services.index.tests.test_util.rst
@@ -0,0 +1,7 @@
+search.services.index.tests.test\_util module
+=============================================
+
+.. automodule:: search.services.index.tests.test_util
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.tests.tests.rst b/docs/source/api/search.services.index.tests.tests.rst
new file mode 100644
index 00000000..b8721d9d
--- /dev/null
+++ b/docs/source/api/search.services.index.tests.tests.rst
@@ -0,0 +1,7 @@
+search.services.index.tests.tests module
+========================================
+
+.. automodule:: search.services.index.tests.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.index.util.rst b/docs/source/api/search.services.index.util.rst
new file mode 100644
index 00000000..4ba90c85
--- /dev/null
+++ b/docs/source/api/search.services.index.util.rst
@@ -0,0 +1,7 @@
+search.services.index.util module
+=================================
+
+.. automodule:: search.services.index.util
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.metadata.rst b/docs/source/api/search.services.metadata.rst
new file mode 100644
index 00000000..93014a82
--- /dev/null
+++ b/docs/source/api/search.services.metadata.rst
@@ -0,0 +1,7 @@
+search.services.metadata module
+===============================
+
+.. automodule:: search.services.metadata
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.rst b/docs/source/api/search.services.rst
index c8661d72..b353333b 100644
--- a/docs/source/api/search.services.rst
+++ b/docs/source/api/search.services.rst
@@ -6,31 +6,19 @@ search.services package
     :undoc-members:
     :show-inheritance:
 
-Submodules
-----------
+Subpackages
+-----------
 
-search.services.fulltext module
--------------------------------
-
-.. automodule:: search.services.fulltext
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
-search.services.index module
-----------------------------
+    search.services.index
+    search.services.tests
 
-.. automodule:: search.services.index
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-search.services.metadata module
--------------------------------
+Submodules
+----------
 
-.. automodule:: search.services.metadata
-    :members:
-    :undoc-members:
-    :show-inheritance:
+.. toctree::
 
+   search.services.fulltext
+   search.services.metadata
 
diff --git a/docs/source/api/search.services.tests.rst b/docs/source/api/search.services.tests.rst
new file mode 100644
index 00000000..8a059df1
--- /dev/null
+++ b/docs/source/api/search.services.tests.rst
@@ -0,0 +1,16 @@
+search.services.tests package
+=============================
+
+.. automodule:: search.services.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+
+   search.services.tests.test_fulltext
+   search.services.tests.test_metadata
+
diff --git a/docs/source/api/search.services.tests.test_fulltext.rst b/docs/source/api/search.services.tests.test_fulltext.rst
new file mode 100644
index 00000000..c36d8420
--- /dev/null
+++ b/docs/source/api/search.services.tests.test_fulltext.rst
@@ -0,0 +1,7 @@
+search.services.tests.test\_fulltext module
+===========================================
+
+.. automodule:: search.services.tests.test_fulltext
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.services.tests.test_metadata.rst b/docs/source/api/search.services.tests.test_metadata.rst
new file mode 100644
index 00000000..4b654897
--- /dev/null
+++ b/docs/source/api/search.services.tests.test_metadata.rst
@@ -0,0 +1,7 @@
+search.services.tests.test\_metadata module
+===========================================
+
+.. automodule:: search.services.tests.test_metadata
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.tests.rst b/docs/source/api/search.tests.rst
new file mode 100644
index 00000000..d2022adc
--- /dev/null
+++ b/docs/source/api/search.tests.rst
@@ -0,0 +1,16 @@
+search.tests package
+====================
+
+.. automodule:: search.tests
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+Submodules
+----------
+
+.. toctree::
+
+   search.tests.test_advanced_search
+   search.tests.test_param_persistence
+
diff --git a/docs/source/api/search.tests.test_advanced_search.rst b/docs/source/api/search.tests.test_advanced_search.rst
new file mode 100644
index 00000000..5d4b705e
--- /dev/null
+++ b/docs/source/api/search.tests.test_advanced_search.rst
@@ -0,0 +1,7 @@
+search.tests.test\_advanced\_search module
+==========================================
+
+.. automodule:: search.tests.test_advanced_search
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/api/search.tests.test_param_persistence.rst b/docs/source/api/search.tests.test_param_persistence.rst
new file mode 100644
index 00000000..a29664ed
--- /dev/null
+++ b/docs/source/api/search.tests.test_param_persistence.rst
@@ -0,0 +1,7 @@
+search.tests.test\_param\_persistence module
+============================================
+
+.. automodule:: search.tests.test_param_persistence
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/architecture.rst b/docs/source/architecture.rst
index 068da8ef..b6877770 100644
--- a/docs/source/architecture.rst
+++ b/docs/source/architecture.rst
@@ -1,70 +1,5 @@
-arXiv Search
-************
-
-The current version of the arXiv search application is designed to meet the
-goals outlined in arXiv-NG milestone H1: Replace Legacy Search.
-
-- H1.1. Replace the current advanced search interface, search results, and
-  search by author name.
-- H1.2. The search result view should support pagination, and ordering by
-  publication date or relevance.
-- H1.3. An indexing agent updates the search index at publication time in
-  response to a Kinesis notification, using metadata from the docmeta endpoint
-  in the classic system.
-
-Key Requirements
-================
-
-- Simple search:
-
-  - Users should be able to search for arXiv papers by title, author, and
-    abstract.
-  - Searches can originate from any part of the arXiv.org site, via the
-    search bar in the site header.
-
-- Advanced search:
-
-  - Users can search for papers using boolean combinations of search terms on
-    title, author names, and/or abstract.
-  - Users can filter results by primary classification, and submission date.
-  - Submission date supports prior year, specific year, and date range.
-
-- Author name search:
-
-  - Users should be able to search for papers by author name.
-  - This should support queries originating on the abs page, and in search
-    results.
-
-- UI: The overall flavor of the search views should be substantially
-  similar to the classic views, but with styling improvements that improve
-  readability, usability, and accessibility.
-
-Quality Goals
-=============
-- Code quality:
-
-  - 90% test coverage on Python components that we develop/control.
-  - Linting: ``pylint`` passes with >= 9/10.
-  - Documentation: ``pydocstyle`` passes.
-  - Static checking: ``mypy`` passes.
-
-- Performance & reliability:
-
-  - Response time: 99% of requests have a latency of 1 second or less.
-  - Error rate: parity with classic search.
-  - Request rate: support request volume of existing search * safety factor 3.
-
-- Accessibility: meet or exceed WCAG 2.0 level A for accessibility.
-
-Constraints
-===========
-- Must be implemented in Python/Flask, and be deployable behind Apache as a
-  Python/WSGI application.
-- The search application itself must be stateless. It must be able to connect
-  to an arbitrary ElasticSearch cluster, which can be specified via
-  configuration.
-- Notifications about new content are delivered via the Kinesis notification
-  broker.
+Architectural overview
+**********************
 
 Context
 =======
@@ -82,7 +17,7 @@ arxiv.org pages, or by clicking on the name of an author on the abstract page
 or other listings.
 
 The search system draws content from the core metadata repository (currently
-the classic arXiv application, via the ``/docmeta`` endpoint), and (future)
+the classic arXiv application, via the ``docmeta`` endpoint), and (future)
 from the fulltext extraction service.
 
 Notifications about new content are disseminated by the publication system via
@@ -107,9 +42,10 @@ Containers
 The core of the search system is an ElasticSearch cluster, provided by the `AWS
 Elasticsearch Service <https://aws.amazon.com/elasticsearch-service/>`_.
 
-The search service, implemented in Python/Flask, provides both the user-facing
-interfaces as well as a REST API. The search service is only responsible for
-reading from ElasticSearch.
+The search application, implemented in Python/Flask, provides both the
+user-facing interfaces as well as a REST API. These two sets of endpoints
+(:mod:`search.routes.ui` and :mod:`search.routes.api`) are deployed as two
+separate services.
 
 The indexing agent application is responsible for coordinating updates to the
 ElasticSearch cluster. The agent subscribes to notifications about the
@@ -119,8 +55,8 @@ repository and the plain text store (future), transforms those data into a
 search document, and sends that document to ES.
 
 
-Components: Search service
-==========================
+Components: Search UI service
+==============================
 
 .. _figure-ng-search-application-components:
 
@@ -129,19 +65,17 @@ Components: Search service
 
    Components of the search service.
 
-The search service is a Flask application that handles user/client requests to
-search arXiv papers.
+The search ui service is a Flask application that handles user/client requests
+to search arXiv papers.
 
-The entry-point to the application is the application factory module, which
-provides :func:`search.factory.create_ui_web_app`. That application factory
-function attaches templates and static files from `arxiv-base
-<https://github.com/cul-it/arxiv-base>`_ for use in search-specific templates,
+The entry-point to the application is :func:`search.factory.create_ui_web_app`.
+That application factory function attaches templates and static files from
+:mod:`arxiv.base` for use in search-specific templates,
 and attaches the routing blueprint provided by :mod:`search.routes.ui`.
 
-:mod:`search.routes.ui` routes parameters from incoming requests to the three
+:mod:`search.routes.ui` routes parameters from incoming requests to the two
 main search controllers:
 
-- :mod:`search.controllers.authors`
 - :mod:`search.controllers.simple`
 - :mod:`search.controllers.advanced`
 
@@ -150,30 +84,10 @@ use-cases in one controller, because we expect user interface methods to change
 significantly as enhanced features are introduced. Each controller module
 implements its own classes for form handling and validation.
 
-
-
 Components: Indexing agent
 ==========================
-
-.. _figure-ng-search-indexing-agent-components:
-
-.. figure:: _static/diagrams/ng-search-indexing-agent-components.png
-   :target: _static/diagrams/ng-search-indexing-agent-components.png
-
-   Components view of the search agent.
-
-Notification handling is provided by two components: a notification consumer
-provided by Amazon, implemented using the Java-based Kinesis Consumer
-Library, and a record processor component implemented in Python that
-processes new notifications received by the consumer. A so-called
-MultiLangDaemon, a stand-alone Java process, provides the glue between the
-KCL and our record processor. When new notifications are received by the
-consumer, the MultiLangDaemon invokes the record processor, which in turn
-starts the processing pipeline.
-
-The :class:`search.agent.consumer.MetadataRecordProcessor` implements the
-indexing procedure for each notification. The
-:meth:`search.agent.consumer.MetadataRecordProcessor.process_record` method
-coordinates retrieval of metadata from the docmeta endpoint (classic),
-transformation of those content into a search document, and updating
-ElasticSearch.
+Notifications about new document metadata are handled by :mod:`search.agent`,
+implemented using :mod:`arxiv.base.agent`. This class coordinates retrieval of
+metadata from the docmeta endpoint (classic), transforms those content
+into a search document using :mod:`search.process.transform`, and updates
+ElasticSearch using :mod:`search.services.index`.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 357345cc..1f31afe3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -6,6 +6,8 @@ arXiv Search System Documentation
    :caption: Contents:
 
    architecture.rst
+   search_ui.rst
+   search_api.rst
    api/modules.rst
 
 
diff --git a/docs/source/search_api.rst b/docs/source/search_api.rst
new file mode 100644
index 00000000..91e81550
--- /dev/null
+++ b/docs/source/search_api.rst
@@ -0,0 +1,36 @@
+Search API (Alpha)
+******************
+
+Release `0.5.0-alpha` introduces support for a metadata search API service.
+This release targets milestone H2: Search API, with the following specific
+goals:
+
+- H2.1: A search API is exposed via the API gateway, with feature-parity to
+  classic "arXiv API".
+
+  - Consider content negotiation to support legacy XML and JSON(-LD).
+
+- H2.2: Opportunistic improvements, fixes, e.g. proper handling of UTF-8
+  characters (ARXIVNG-257).
+- H2.3: Deprecate classic arXiv API.
+
+
+The current release supports only JSON serialization, provided by
+:class:`search.routes.api.serialize.JSONSerializer`. An Atom/XML serializer
+:class:`search.routes.api.serialize.AtomXMLSerializer` is planned but not yet
+implemented.
+
+A formal description of the API (OpenAPI 3.0) and resources (JSON Schema) can
+be found at `https://github.com/cul-it/arxiv-search/tree/master/schema`_.
+
+The service endpoints are defined in :mod:`search.routes.api`:
+
+- The root endpoint :func:`search.routes.api.search` supports queries using the
+  same semantics as the advanced search UI.
+- The paper metadata endpoint :func:`search.routes.api.paper` provides more
+  detailed metadata for a specific arXiv e-print.
+
+Requests are handled by the controllers in :mod:`search.controllers.api`. As of
+this writing, the :class:`search.domain.api.APIQuery` domain class is identical
+to :class:`search.domain.api.AdvancedQuery`, but this may change in future
+versions.
diff --git a/docs/source/search_ui.rst b/docs/source/search_ui.rst
new file mode 100644
index 00000000..581b50e2
--- /dev/null
+++ b/docs/source/search_ui.rst
@@ -0,0 +1,67 @@
+Search Interface
+****************
+
+The current version of the arXiv search application is designed to meet the
+goals outlined in arXiv-NG milestone H1: Replace Legacy Search.
+
+- H1.1. Replace the current advanced search interface, search results, and
+  search by author name.
+- H1.2. The search result view should support pagination, and ordering by
+  publication date or relevance.
+- H1.3. An indexing agent updates the search index at publication time in
+  response to a Kinesis notification, using metadata from the docmeta endpoint
+  in the classic system.
+
+Key Requirements
+================
+
+- Simple search:
+
+  - Users should be able to search for arXiv papers by title, author, and
+    abstract.
+  - Searches can originate from any part of the arXiv.org site, via the
+    search bar in the site header.
+
+- Advanced search:
+
+  - Users can search for papers using boolean combinations of search terms on
+    title, author names, and/or abstract.
+  - Users can filter results by primary classification, and submission date.
+  - Submission date supports prior year, specific year, and date range.
+
+- Author name search:
+
+  - Users should be able to search for papers by author name.
+  - This should support queries originating on the abs page, and in search
+    results.
+
+- UI: The overall flavor of the search views should be substantially
+  similar to the classic views, but with styling that improves
+  readability, usability, and accessibility.
+
+Quality Goals
+=============
+- Code quality:
+
+  - 90% test coverage on Python components that we develop/control.
+  - Linting: ``pylint`` passes with >= 9/10.
+  - Documentation: ``pydocstyle`` passes.
+  - Static checking: ``mypy`` passes.
+
+- Performance & reliability:
+
+  - Response time: 99% of requests have a latency of 1 second or less.
+  - Error rate: parity with classic search.
+  - Request rate: support request volume of existing search * safety factor 3.
+
+- Accessibility: meet or exceed WCAG 2.0 level A for accessibility.
+
+Constraints
+===========
+- Must be implemented in Python/Flask, and be deployable behind Apache as a
+  Python/WSGI application.
+- The search application itself must be stateless. It must be able to connect
+  to an arbitrary ElasticSearch cluster, which can be specified via
+  configuration.
+- Notifications about new content are delivered via the Kinesis notification
+  broker.
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 70e325c6..05d2f395 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -142,17 +142,18 @@ def as_json(document_or_set: Union[DocumentSet, Document]) -> str:
     return JSONSerializer.serialize_document(document_or_set)  # type: ignore
 
 
-# class AtomXMLSerializer(BaseSerializer):
-#     """."""
-#
-#     ATOM = "http://www.w3.org/2005/Atom"
-#     OPENSEARCH = "http://a9.com/-/spec/opensearch/1.1/"
-#     ARXIV = "http://arxiv.org/schemas/atom"
-#     NSMAP = {
-#         None: ATOM,
-#         "opensearch": OPENSEARCH,
-#         "arxiv": ARXIV
-#     }
+# TODO: implement me!
+class AtomXMLSerializer(BaseSerializer):
+    """Atom XML serializer for paper metadata."""
+
+    ATOM = "http://www.w3.org/2005/Atom"
+    OPENSEARCH = "http://a9.com/-/spec/opensearch/1.1/"
+    ARXIV = "http://arxiv.org/schemas/atom"
+    NSMAP = {
+        None: ATOM,
+        "opensearch": OPENSEARCH,
+        "arxiv": ARXIV
+    }
 #     fields = {
 #         'title': '{%s}title' % ATOM,
 #         'id': '{%s}id' % ATOM,

From 73f27faa28feb9384fbdac191e6d4f15a09dba42 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Sep 2018 15:43:05 -0400
Subject: [PATCH 17/77] updated docs

---
 Pipfile                                       |   3 +
 Pipfile.lock                                  | 122 +++++++++++++++++-
 docs/requirements.txt                         |   3 -
 .../source/api/search.controllers.authors.rst |  20 ---
 docs/source/architecture.rst                  |  12 +-
 docs/source/conf.py                           |  10 +-
 docs/source/search_api.rst                    |   2 +-
 7 files changed, 140 insertions(+), 32 deletions(-)
 delete mode 100644 docs/requirements.txt
 delete mode 100644 docs/source/api/search.controllers.authors.rst

diff --git a/Pipfile b/Pipfile
index 45d2e0e2..d4a37c0e 100644
--- a/Pipfile
+++ b/Pipfile
@@ -52,3 +52,6 @@ lxml = "*"
 
 [dev-packages]
 coveralls = "*"
+sphinx = "*"
+sphinxcontrib-websupport = "*"
+sphinx-autodoc-typehints = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 162ea5be..cb035726 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "b56d17953fa137da21598148d166019ec6ddb84cd8398580351e01275c6a94c7"
+            "sha256": "f14dcfd9ec69893845bb7478b52f195e7afe2dffc3c667d277e87afc12ec5dd3"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -539,6 +539,20 @@
         }
     },
     "develop": {
+        "alabaster": {
+            "hashes": [
+                "sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
+                "sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
+            ],
+            "version": "==0.7.11"
+        },
+        "babel": {
+            "hashes": [
+                "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
+                "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
+            ],
+            "version": "==2.6.0"
+        },
         "certifi": {
             "hashes": [
                 "sha256:40523d2efb60523e113b44602298f0960e900388cf3bb6043f645cf57ea9e3f5",
@@ -615,6 +629,15 @@
             ],
             "version": "==0.6.2"
         },
+        "docutils": {
+            "hashes": [
+                "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
+                "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
+                "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
+            ],
+            "index": "pypi",
+            "version": "==0.14"
+        },
         "idna": {
             "hashes": [
                 "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f",
@@ -623,6 +646,64 @@
             "index": "pypi",
             "version": "==2.6"
         },
+        "imagesize": {
+            "hashes": [
+                "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
+                "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
+            ],
+            "version": "==1.1.0"
+        },
+        "jinja2": {
+            "hashes": [
+                "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
+                "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
+            ],
+            "index": "pypi",
+            "version": "==2.10"
+        },
+        "markupsafe": {
+            "hashes": [
+                "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665"
+            ],
+            "index": "pypi",
+            "version": "==1.0"
+        },
+        "packaging": {
+            "hashes": [
+                "sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
+                "sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
+            ],
+            "version": "==17.1"
+        },
+        "pygments": {
+            "hashes": [
+                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
+                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
+            ],
+            "version": "==2.2.0"
+        },
+        "pyparsing": {
+            "hashes": [
+                "sha256:905d8090c335314568b5faee0025b1829f27bb974604a5762a6cdef3a7dfc3b7",
+                "sha256:f493ee323be1e94929416b3585eefcc04943115cecbaaa35a8c86d1a2368af19"
+            ],
+            "version": "==2.2.1"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:59707844a9825589878236ff2f4e0dc9958511b7ffaae94dc615da07d4a68d33",
+                "sha256:699d18a2a56f19ee5698ab1123bbcc1d269d061996aeb1eda6d89248d3542b82",
+                "sha256:80af0f3008046b9975242012a985f04c5df1f01eed4ec1633d56cc47a75a6a48",
+                "sha256:8cc90340159b5d7ced6f2ba77694d946fc975b09f1a51d93f3ce3bb399396f94",
+                "sha256:c41c62827ce9cafacd6f2f7018e4f83a6f1986e87bfd000b8cfbd4ab5da95f1a",
+                "sha256:d0ef5ef55ed3d37854320d4926b04a4cb42a2e88f71da9ddfdacfde8e364f027",
+                "sha256:dd2e4ca6ce3785c8dd342d1853dd9052b19290d5bf66060846e5dc6b8d6667f7",
+                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7",
+                "sha256:feb2365914948b8620347784b6b6da356f31c9d03560259070b2f30cff3d469d"
+            ],
+            "index": "pypi",
+            "version": "==2017.3"
+        },
         "requests": {
             "hashes": [
                 "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
@@ -631,6 +712,45 @@
             "index": "pypi",
             "version": "==2.18.4"
         },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        },
+        "snowballstemmer": {
+            "hashes": [
+                "sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128",
+                "sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89"
+            ],
+            "index": "pypi",
+            "version": "==1.2.1"
+        },
+        "sphinx": {
+            "hashes": [
+                "sha256:95acd6648902333647a0e0564abdb28a74b0a76d2333148aa35e5ed1f56d3c4b",
+                "sha256:c091dbdd5cc5aac6eb95d591a819fd18bccec90ffb048ec465b165a48b839b45"
+            ],
+            "index": "pypi",
+            "version": "==1.8.0"
+        },
+        "sphinx-autodoc-typehints": {
+            "hashes": [
+                "sha256:1a9df6cb3ba72453ea4bfbe96ea887abc0d796b2ce9508c2189217a1bb69b366",
+                "sha256:46cc9e985ee6d8bbbd07fffd95b815c39a72df6afb600f59671f85f7340e7d0d"
+            ],
+            "index": "pypi",
+            "version": "==1.3.0"
+        },
+        "sphinxcontrib-websupport": {
+            "hashes": [
+                "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
+                "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
+            ],
+            "index": "pypi",
+            "version": "==1.1.0"
+        },
         "urllib3": {
             "hashes": [
                 "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index 63a31812..00000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-sphinx-autodoc-typehints==1.2.5
-sphinxcontrib-websupport==1.0.1
-Sphinx==1.7.0
diff --git a/docs/source/api/search.controllers.authors.rst b/docs/source/api/search.controllers.authors.rst
deleted file mode 100644
index 5015a130..00000000
--- a/docs/source/api/search.controllers.authors.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-search.controllers.authors package
-==================================
-
-.. automodule:: search.controllers.authors
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-Submodules
-----------
-
-search.controllers.authors.forms module
----------------------------------------
-
-.. automodule:: search.controllers.authors.forms
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-
diff --git a/docs/source/architecture.rst b/docs/source/architecture.rst
index b6877770..4f3f8da5 100644
--- a/docs/source/architecture.rst
+++ b/docs/source/architecture.rst
@@ -3,6 +3,12 @@ Architectural overview
 
 Context
 =======
+The arXiv search system supports queries about arXiv papers both from human
+users and from API clients (via the arXiv API gateway). Most readers arrive
+at the search interface via a small search bar in the running header of
+arxiv.org pages, or by clicking on the name of an author on the abstract page
+or other listings.
+
 .. _figure-ng-search-context:
 
 .. figure:: _static/diagrams/ng-search-context.png
@@ -10,12 +16,6 @@ Context
 
    System context for arXiv search.
 
-The arXiv search system supports queries about arXiv papers both from human
-users and from API clients (via the arXiv API gateway). Most readers arrive
-at the search interface via a small search bar in the running header of
-arxiv.org pages, or by clicking on the name of an author on the abstract page
-or other listings.
-
 The search system draws content from the core metadata repository (currently
 the classic arXiv application, via the ``docmeta`` endpoint), and (future)
 from the fulltext extraction service.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 763c44dc..5ab00a0c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -181,4 +181,12 @@
 ]
 
 
-intersphinx_mapping = {'https://docs.python.org/3/': None}
+intersphinx_mapping = {
+    'python':  ('https://docs.python.org/3.6', None),
+    'arxitecture':  ('https://cul-it.github.io/arxiv-arxitecture/', None),
+    'arxiv.taxonomy': ('https://cul-it.github.io/arxiv-base', None),
+    'arxiv.base':  ('https://cul-it.github.io/arxiv-base', None),
+    'browse':  ('https://cul-it.github.io/arxiv-browse/', None),
+    'search':  ('https://cul-it.github.io/arxiv-search/', None),
+    'zero':  ('https://cul-it.github.io/arxiv-zero/', None),
+}
diff --git a/docs/source/search_api.rst b/docs/source/search_api.rst
index 91e81550..df5100d7 100644
--- a/docs/source/search_api.rst
+++ b/docs/source/search_api.rst
@@ -21,7 +21,7 @@ The current release supports only JSON serialization, provided by
 implemented.
 
 A formal description of the API (OpenAPI 3.0) and resources (JSON Schema) can
-be found at `https://github.com/cul-it/arxiv-search/tree/master/schema`_.
+be found at `https://github.com/cul-it/arxiv-search/tree/develop/schema`_.
 
 The service endpoints are defined in :mod:`search.routes.api`:
 

From 40f7e1608c1b5a801ec472ffc5d2d470d5e86d87 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Sep 2018 15:48:15 -0400
Subject: [PATCH 18/77] updated readme

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 1a31e886..d297f528 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,14 @@ manually restart to see those changes take effect.
 
 If all goes well... http://127.0.0.1:5000/ should render the basic search page.
 
+You can run the API in dev mode by changing `FLASK_APP` to point to ``api.py``,
+i.e.:
+
+```bash
+FLASK_APP=api.py FLASK_DEBUG=1 ELASTICSEARCH_HOST=127.0.0.1 pipenv run flask run
+```
+
+
 
 ## Running the indexing agent.
 

From 8f5f12f5c364a455d419ff35f3ba8003e6e63fe1 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Sep 2018 15:55:46 -0400
Subject: [PATCH 19/77] minor update to docs

---
 docs/source/search_api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/search_api.rst b/docs/source/search_api.rst
index df5100d7..693ef923 100644
--- a/docs/source/search_api.rst
+++ b/docs/source/search_api.rst
@@ -21,7 +21,7 @@ The current release supports only JSON serialization, provided by
 implemented.
 
 A formal description of the API (OpenAPI 3.0) and resources (JSON Schema) can
-be found at `https://github.com/cul-it/arxiv-search/tree/develop/schema`_.
+be found at `<https://github.com/cul-it/arxiv-search/tree/develop/schema>`_.
 
 The service endpoints are defined in :mod:`search.routes.api`:
 
@@ -32,5 +32,5 @@ The service endpoints are defined in :mod:`search.routes.api`:
 
 Requests are handled by the controllers in :mod:`search.controllers.api`. As of
 this writing, the :class:`search.domain.api.APIQuery` domain class is identical
-to :class:`search.domain.api.AdvancedQuery`, but this may change in future
+to :class:`search.domain.advanced.AdvancedQuery`, but this may change in future
 versions.

From d84995f0280231aa2039ff92f3b2e4d1cfb12c9e Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 27 Sep 2018 07:52:45 -0400
Subject: [PATCH 20/77] ARXIVNG-1206 added support for configurable return
 fields; query by URI.

---
 schema/resources/Document.json     |  21 +++---
 schema/search.yaml                 |  12 +++
 search/controllers/api/__init__.py |  19 ++++-
 search/domain/api.py               |  19 ++++-
 search/domain/base.py              |   1 +
 search/routes/api/__init__.py      |   4 +-
 search/routes/api/serialize.py     | 117 +++++++++++++++++------------
 search/services/index/__init__.py  |   5 ++
 search/services/index/prepare.py   |  25 +++++-
 search/services/index/results.py   |   1 -
 10 files changed, 155 insertions(+), 69 deletions(-)

diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index 3120a19c..569f24af 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -92,6 +92,10 @@
       "type": "string"
     },
     "paper_id": {
+      "description": "arXiv paper identifier without version affix.",
+      "type": "string"
+    },
+    "paper_id_v": {
       "description": "arXiv paper identifier with version affix.",
       "type": "string"
     },
@@ -115,6 +119,10 @@
         }
       }
     },
+    "href": {
+      "type": "string",
+      "format": "uri"
+    },
     "fulltext": {
       "type": "string"
     },
@@ -205,16 +213,9 @@
     }
   },
   "required": [
-    "abstract",
-    "authors",
-    "submitted_date",
-    "is_current",
-    "is_withdrawn",
-    "license",
     "paper_id",
-    "primary_classification",
-    "title",
-    "source",
-    "version"
+    "paper_id_v",
+    "version",
+    "href"
   ]
 }
diff --git a/schema/search.yaml b/schema/search.yaml
index 6c1f49ba..73b15c4b 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -41,6 +41,18 @@ paths:
             type: array
             items:
               type: string
+        - name: include
+          in: query
+          description: |
+            Fields to include in the result set, in addition to paper ID and
+            version.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: array
+            items:
+              type: string
       responses:
         '200':
           description: All arXiv papers that respond to specified query.
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 768839e6..4c361d34 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -1,6 +1,6 @@
 """Controller for search API requests."""
 
-from typing import Tuple, Dict, Any, Optional
+from typing import Tuple, Dict, Any, Optional, List
 import re
 from datetime import date, datetime
 from dateutil.relativedelta import relativedelta
@@ -56,10 +56,14 @@ def search(params: MultiDict) -> Tuple[DocumentSet, int, Dict[str, Any]]:
     if classifications is not None:
         q.primary_classification = classifications
 
+    include_fields = _get_include_fields(params)
+    if include_fields:
+        q.include_fields += include_fields
+
     q = paginate(q, params)     # type: ignore
     document_set = index.search(q, highlight=False)
     logger.debug('Got document set with %i results', len(document_set.results))
-    return document_set, status.HTTP_200_OK, {}
+    return {'results': document_set, 'query': q}, status.HTTP_200_OK, {}
 
 
 def paper(paper_id: str) -> Tuple[Document, int, Dict[str, Any]]:
@@ -91,7 +95,16 @@ def paper(paper_id: str) -> Tuple[Document, int, Dict[str, Any]]:
     except index.DocumentNotFound as e:
         logger.error('Document not found')
         raise NotFound('No such document') from e
-    return document, status.HTTP_200_OK, {}
+    return {'results': document}, status.HTTP_200_OK, {}
+
+
+def _get_include_fields(params: MultiDict) -> List[str]:
+    include_fields = params.getlist('include')
+    allowed_fields = Document.fields()
+    if include_fields:
+        print(include_fields)
+        return [field for field in include_fields if field in allowed_fields]
+    return []
 
 
 def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
diff --git a/search/domain/api.py b/search/domain/api.py
index 9de39eb1..e25274e2 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -1,12 +1,20 @@
 """API-specific domain classes."""
 
-from .base import DateRange, Query, ClassificationList
+from .base import DateRange, Query, ClassificationList, List
 from .advanced import FieldedSearchList, FieldedSearchTerm
 
 from dataclasses import dataclass, field
 from typing import NamedTuple, Optional
 
 
+def get_default_extra_fields() -> List[str]:
+    return ['title']
+
+
+def get_required_fields() -> List[str]:
+    return ['paper_id', 'paper_id_v', 'version']
+
+
 @dataclass
 class APIQuery(Query):
     """
@@ -14,9 +22,16 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
-
     date_range: Optional[DateRange] = None
     primary_classification: ClassificationList = field(
         default_factory=ClassificationList
     )
     terms: FieldedSearchList = field(default_factory=FieldedSearchList)
+
+    include_fields: List[str] = field(default_factory=get_default_extra_fields)
+
+    def __post_init__(self) -> None:
+        self.include_fields = list(
+            set(get_required_fields() + self.include_fields)
+        )
+        print(self.include_fields)
diff --git a/search/domain/base.py b/search/domain/base.py
index a41f39c7..7ba8e627 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -165,6 +165,7 @@ class Query:
         ('paper_id', 'arXiv identifier'),
         ('doi', 'DOI'),
         ('orcid', 'ORCID'),
+        ('license', 'License (URI)'),
         ('author_id', 'arXiv author ID'),
         ('help', 'Help pages'),
         ('full_text', 'Full text')
diff --git a/search/routes/api/__init__.py b/search/routes/api/__init__.py
index c417bd03..16f40d10 100644
--- a/search/routes/api/__init__.py
+++ b/search/routes/api/__init__.py
@@ -38,7 +38,7 @@ def search() -> Response:
     # requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
     # if requested == ATOM_XML:
     #     return serialize.as_atom(data), status, headers
-    return serialize.as_json(data), status_code, headers
+    return serialize.as_json(data['results'], query=data['query']), status_code, headers
 
 
 @blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
@@ -46,4 +46,4 @@ def search() -> Response:
 def paper(paper_id: str, version: str) -> Response:
     """Document metadata endpoint."""
     data, status_code, headers = api.paper(f'{paper_id}v{version}')
-    return serialize.as_json(data), status_code, headers
+    return serialize.as_json(data['results']), status_code, headers
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 05d2f395..8ccd56d1 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -1,11 +1,12 @@
 """Serializers for API responses."""
 
-from typing import Union
+from typing import Union, Optional
 from lxml import etree
 from flask import jsonify, url_for
 
 from arxiv import status
-from search.domain import DocumentSet, Document, Classification, Person
+from search.domain import DocumentSet, Document, Classification, Person, \
+    APIQuery
 
 
 class BaseSerializer(object):
@@ -28,7 +29,9 @@ def _transform_person(cls, person: Person) -> dict:
         }
 
     @classmethod
-    def _transform_classification(cls, clsn: Classification) -> dict:
+    def _transform_classification(cls, clsn: Classification) -> Optional[dict]:
+        if clsn.category is None:
+            return None
         return {
             'group': clsn.group,
             'archive': clsn.archive,
@@ -43,7 +46,9 @@ def _transform_format(cls, fmt: str, paper_id: str, version: int) -> dict:
         }
 
     @classmethod
-    def _transform_latest(cls, document: Document) -> dict:
+    def _transform_latest(cls, document: Document) -> Optional[dict]:
+        if not document.latest:
+            return None
         return {
             "paper_id": document.latest,
             "href": url_for("api.paper", paper_id=document.paper_id,
@@ -60,63 +65,77 @@ def _transform_license(cls, license: dict) -> dict:
         }
 
     @classmethod
-    def transform_document(cls, doc: Document) -> dict:
+    def transform_document(cls, doc: Document,
+                           query: Optional[APIQuery] = None) -> dict:
         """Select a subset of :class:`Document` properties for public API."""
-        return {
-            'abs_categories': doc.abs_categories,
-            'abstract': doc.abstract,
-            'acm_class': doc.acm_class,
-            'owners': [
+        fields = [
+            ('abs_categories', doc.abs_categories),
+            ('abstract', doc.abstract),
+            ('acm_class', doc.acm_class),
+            ('owners', [
                 cls._transform_person(owner) for owner in doc.owners
                 if owner is not None
-            ],
-            'authors': [
+            ]),
+            ('authors', [
                 cls._transform_person(author) for author in doc.authors
                 if author is not None
-            ],
-            'comments': doc.comments,
-            'submitted_date': doc.submitted_date,
-            'submitted_date_first': doc.submitted_date_first,
-            'announced_date_first': (
+            ]),
+            ('comments', doc.comments),
+            ('authors_freeform', doc.authors_freeform),
+            ('submitted_date', doc.submitted_date),
+            ('submitted_date_first', doc.submitted_date_first),
+            ('announced_date_first', (
                 doc.announced_date_first.strftime('%Y-%m')
                 if doc.announced_date_first is not None
                 else None
-            ),
-            'paper_id': doc.paper_id_v,
-            'doi': doc.doi,
-            'formats': [
+            )),
+            ('paper_id', doc.paper_id),
+            ('paper_id_v', doc.paper_id_v),
+            ('doi', doc.doi),
+            ('formats', [
                 cls._transform_format(fmt, doc.paper_id, doc.version)
                 for fmt in doc.formats
-            ],
-            'is_current': doc.is_current,
-            'is_withdrawn': doc.is_withdrawn,
-            'journal_ref': doc.journal_ref,
-            'license': cls._transform_license(doc.license),
-            'msc_class': doc.msc_class,
-            'primary_classification': cls._transform_classification(
-                doc.primary_classification
-            ),
-            'secondary_classification': [
+            ]),
+            ('is_current', doc.is_current),
+            ('is_withdrawn', doc.is_withdrawn),
+            ('journal_ref', doc.journal_ref),
+            ('license',
+             cls._transform_license(doc.license) if doc.license else None),
+            ('msc_class', doc.msc_class),
+            ('primary_classification',
+             cls._transform_classification(doc.primary_classification)
+             if doc.primary_classification else None),
+            ('secondary_classification', [
                 cls._transform_classification(clsn)
                 for clsn in doc.secondary_classification
-            ],
-            'report_num': doc.report_num,
-            'source': doc.source,  # TODO: link?
-            'submitter': (
+            ]),
+            ('report_num', doc.report_num),
+            ('source', doc.source),  # TODO, link?
+            ('submitter', (
                 cls._transform_person(doc.submitter)
                 if doc.submitter is not None else None
-            ),
-            'title': doc.title,
-            'version': doc.version,
-            'latest': cls._transform_latest(doc),
-        }
+            )),
+            ('title', doc.title),
+            ('version', doc.version),
+            ('latest', cls._transform_latest(doc))
+        ]
+
+        # Only return fields that have been explicitly requested.
+        if query is not None:
+            _data = {field: value for field, value in fields
+                     if field in query.include_fields}
+            _data['href'] = url_for("api.paper", paper_id=doc.paper_id,
+                                    version=doc.version, _external=True)
+            return _data
+        return {field: value for field, value in fields}
 
     @classmethod
-    def serialize(cls, document_set: DocumentSet) -> str:
+    def serialize(cls, document_set: DocumentSet,
+                  query: Optional[APIQuery] = None) -> str:
         """Generate JSON for a :class:`DocumentSet`."""
         serialized: str = jsonify({
             'results': [
-                cls.transform_document(doc)
+                cls.transform_document(doc, query=query)
                 for doc in document_set.results
             ],
             'metadata': {
@@ -129,17 +148,21 @@ def serialize(cls, document_set: DocumentSet) -> str:
         return serialized
 
     @classmethod
-    def serialize_document(cls, document: Document) -> str:
+    def serialize_document(cls, document: Document,
+                           query: Optional[APIQuery] = None) -> str:
         """Generate JSON for a single :class:`Document`."""
-        serialized: str = jsonify(cls.transform_document(document))
+        serialized: str = jsonify(
+            cls.transform_document(document, query=query)
+        )
         return serialized
 
 
-def as_json(document_or_set: Union[DocumentSet, Document]) -> str:
+def as_json(document_or_set: Union[DocumentSet, Document],
+            query: Optional[APIQuery] = None) -> str:
     """Serialize a :class:`DocumentSet` as JSON."""
     if type(document_or_set) is DocumentSet:
-        return JSONSerializer.serialize(document_or_set)  # type: ignore
-    return JSONSerializer.serialize_document(document_or_set)  # type: ignore
+        return JSONSerializer.serialize(document_or_set, query=query)  # type: ignore
+    return JSONSerializer.serialize_document(document_or_set, query=query)  # type: ignore
 
 
 # TODO: implement me!
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index 3d355832..ff101adc 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -407,6 +407,11 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
             # fields and configuration for highlighting.
             current_search = highlighting.highlight(current_search)
 
+        if hasattr(query, 'include_fields'):
+            current_search = current_search.extra(
+                _source={'include': query.include_fields}
+            )
+
         with handle_es_exceptions():
             # Slicing the search adds pagination parameters to the request.
             resp = current_search[query.page_start:query.page_end].execute()
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index faa2b5aa..4f1fa313 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -142,6 +142,11 @@ def _query_paper_id(term: str, operator: str = 'and') -> Q:
     return q
 
 
+def _license_query(term: str, operator: str = 'and') -> Q:
+    """Search by license, using its URI (exact)."""
+    return Q('term', **{'license__uri': term})
+
+
 def _query_combined(term: str) -> Q:
     # Only wildcards in literals should be escaped.
     wildcard_escaped, has_wildcard = wildcard_escape(term)
@@ -331,22 +336,33 @@ def _to_q(classification: Classification) -> Q:
         _qs = []
         if classification.group:
             _qs.append(
-                Q("match", **{"primary_classification__group__id": {"query": classification.group}})
+                Q("match", **{
+                    "primary_classification__group__id": {
+                        "query": classification.group
+                    }
+                })
             )
         if classification.archive:
             _qs.append(
-                Q("match", **{"primary_classification__archive__id": {"query": classification.archive}})
+                Q("match", **{
+                    "primary_classification__archive__id": {
+                        "query": classification.archive
+                    }
+                })
             )
         if classification.category:
             _qs.append(
-                Q("match", **{"primary_classification__category__id": {"query": classification.category}})
+                Q("match", **{
+                    "primary_classification__category__id": {
+                        "query": classification.category
+                    }
+                })
             )
         return reduce(iand, _qs)
 
     return reduce(ior, [_to_q(clsn) for clsn in classifications])
 
 
-
 SEARCH_FIELDS: Dict[str, Callable[[str], Q]] = dict([
     ('author', author_query),
     ('title', _query_title),
@@ -360,5 +376,6 @@ def _to_q(classification: Classification) -> Q:
     ('paper_id', _query_paper_id),
     ('orcid', orcid_query),
     ('author_id', author_id_query),
+    ('license', _license_query),
     ('all', _query_all_fields)
 ])
diff --git a/search/services/index/results.py b/search/services/index/results.py
index d6cabc2a..bbc16606 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -43,7 +43,6 @@ def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
     result['match'] = {}  # Hit on field, but no highlighting.
     result['truncated'] = {}    # Preview is truncated.
 
-    logger.debug('Raw data is a %s instance', type(raw))
     for key in Document.fields():
         if type(raw) is Hit:
             if not hasattr(raw, key):

From dc4f295c6a14c39ce50157f87479abbc02361070 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 27 Sep 2018 08:18:09 -0400
Subject: [PATCH 21/77] cleanup, test fixes

---
 Pipfile                             |  2 +-
 Pipfile.lock                        | 22 ++++----
 search/domain/api.py                |  8 ++-
 search/routes/api/__init__.py       |  3 +-
 search/routes/api/serialize.py      | 11 ++--
 search/routes/api/tests/test_api.py | 88 ++++++++++++++++++++++++++++-
 6 files changed, 111 insertions(+), 23 deletions(-)

diff --git a/Pipfile b/Pipfile
index d4a37c0e..fe3ff4c6 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-base = "==0.11.1rc5"
+arxiv-base = "==0.11.1rc6"
 arxiv-auth = "==0.1.0rc14"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index cb035726..1ea189d1 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "f14dcfd9ec69893845bb7478b52f195e7afe2dffc3c667d277e87afc12ec5dd3"
+            "sha256": "d1b1281c8c786c7f3ca24513df4fea25993b9b4bb08fcf0c66586d6455701ac4"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:e8f8504a4b8ca5a190a3572882cd1cfd544b071cc2deec1e62d90635787da66f"
+                "sha256:2c3eb57d1cab3af774b72b2ea0cd673d0e67d78f41dbbc11b6ee648c38bb92c9"
             ],
             "index": "pypi",
-            "version": "==0.11.1rc5"
+            "version": "==0.11.1rc6"
         },
         "bleach": {
             "hashes": [
@@ -453,9 +453,9 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:ef6569ad403520ee13e180e1bfd6ed71a0254192a934ec1dbd3dbf48f4aa9524"
+                "sha256:c5951d9ef1d5404ed04bae5a16b60a0779087378928f997a294d1229c6ca4d3e"
             ],
-            "version": "==1.2.11"
+            "version": "==1.2.12"
         },
         "thrift": {
             "hashes": [
@@ -670,10 +670,10 @@
         },
         "packaging": {
             "hashes": [
-                "sha256:e9215d2d2535d3ae866c3d6efc77d5b24a0192cce0ff20e42896cc0664f889c0",
-                "sha256:f019b770dd64e585a99714f1fd5e01c7a8f11b45635aa953fd41c689a657375b"
+                "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
+                "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
             ],
-            "version": "==17.1"
+            "version": "==18.0"
         },
         "pygments": {
             "hashes": [
@@ -729,11 +729,11 @@
         },
         "sphinx": {
             "hashes": [
-                "sha256:95acd6648902333647a0e0564abdb28a74b0a76d2333148aa35e5ed1f56d3c4b",
-                "sha256:c091dbdd5cc5aac6eb95d591a819fd18bccec90ffb048ec465b165a48b839b45"
+                "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
+                "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
             ],
             "index": "pypi",
-            "version": "==1.8.0"
+            "version": "==1.8.1"
         },
         "sphinx-autodoc-typehints": {
             "hashes": [
diff --git a/search/domain/api.py b/search/domain/api.py
index e25274e2..e806cbc2 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -8,11 +8,13 @@
 
 
 def get_default_extra_fields() -> List[str]:
+    """These are the default extra fields."""
     return ['title']
 
 
 def get_required_fields() -> List[str]:
-    return ['paper_id', 'paper_id_v', 'version']
+    """These fields should always be included."""
+    return ['paper_id', 'paper_id_v', 'version', 'href']
 
 
 @dataclass
@@ -22,16 +24,16 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
+
     date_range: Optional[DateRange] = None
     primary_classification: ClassificationList = field(
         default_factory=ClassificationList
     )
     terms: FieldedSearchList = field(default_factory=FieldedSearchList)
-
     include_fields: List[str] = field(default_factory=get_default_extra_fields)
 
     def __post_init__(self) -> None:
+        """Be sure that the required fields are prepended to include_fields."""
         self.include_fields = list(
             set(get_required_fields() + self.include_fields)
         )
-        print(self.include_fields)
diff --git a/search/routes/api/__init__.py b/search/routes/api/__init__.py
index 16f40d10..a2dc6fb5 100644
--- a/search/routes/api/__init__.py
+++ b/search/routes/api/__init__.py
@@ -38,7 +38,8 @@ def search() -> Response:
     # requested = request.accept_mimetypes.best_match([JSON, ATOM_XML])
     # if requested == ATOM_XML:
     #     return serialize.as_atom(data), status, headers
-    return serialize.as_json(data['results'], query=data['query']), status_code, headers
+    response_data = serialize.as_json(data['results'], query=data['query'])
+    return response_data, status_code, headers
 
 
 @blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 8ccd56d1..60eca787 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -117,17 +117,18 @@ def transform_document(cls, doc: Document,
             )),
             ('title', doc.title),
             ('version', doc.version),
-            ('latest', cls._transform_latest(doc))
+            ('latest', cls._transform_latest(doc)),
+            ('href', url_for("api.paper", paper_id=doc.paper_id,
+                             version=doc.version, _external=True))
         ]
 
         # Only return fields that have been explicitly requested.
         if query is not None:
             _data = {field: value for field, value in fields
                      if field in query.include_fields}
-            _data['href'] = url_for("api.paper", paper_id=doc.paper_id,
-                                    version=doc.version, _external=True)
-            return _data
-        return {field: value for field, value in fields}
+        else:
+            _data = {field: value for field, value in fields}
+        return _data
 
     @classmethod
     def serialize(cls, document_set: DocumentSet,
diff --git a/search/routes/api/tests/test_api.py b/search/routes/api/tests/test_api.py
index 1d5b1f9c..1f1c47f7 100644
--- a/search/routes/api/tests/test_api.py
+++ b/search/routes/api/tests/test_api.py
@@ -44,7 +44,7 @@ def test_with_token_lacking_scope(self):
         self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
 
     @mock.patch(f'{factory.__name__}.api.api')
-    def test_with_valid_token(self, mock_api_controller):
+    def test_with_valid_token(self, mock_controller):
         """Client auth token has required public read scope."""
         document = domain.Document(
             submitted_date=datetime.now(),
@@ -100,7 +100,8 @@ def test_with_valid_token(self, mock_api_controller):
             results=[document],
             metadata={'start': 0, 'end': 1, 'size': 50, 'total': 1}
         )
-        mock_api_controller.search.return_value = docs, status.HTTP_200_OK, {}
+        r_data = {'results': docs, 'query': domain.APIQuery()}
+        mock_controller.search.return_value = r_data, status.HTTP_200_OK, {}
         token = helpers.generate_token('1234', 'foo@bar.com', 'foouser',
                                        scope=[auth.scopes.READ_PUBLIC])
         response = self.client.get('/', headers={'Authorization': token})
@@ -113,3 +114,86 @@ def test_with_valid_token(self, mock_api_controller):
         )
         self.assertIsNone(jsonschema.validate(data, self.schema, resolver=res),
                           'Response content is valid per schema')
+
+        for field in domain.api.get_required_fields():
+            self.assertIn(field, data['results'][0])
+
+    @mock.patch(f'{factory.__name__}.api.api')
+    def test_with_valid_token_limit_fields(self, mock_controller):
+        """Client auth token has required public read scope."""
+        document = domain.Document(
+            submitted_date=datetime.now(),
+            submitted_date_first=datetime.now(),
+            announced_date_first=datetime.now(),
+            id='1234.5678',
+            abstract='very abstract',
+            authors=[
+                domain.Person(full_name='F. Bar', orcid='1234-5678-9012-3456')
+            ],
+            submitter=domain.Person(full_name='S. Ubmitter', author_id='su_1'),
+            modified_date=datetime.now(),
+            updated_date=datetime.now(),
+            is_current=True,
+            is_withdrawn=False,
+            license={
+                'uri': 'http://foo.license/1',
+                'label': 'Notalicense 5.4'
+            },
+            paper_id='1234.5678',
+            paper_id_v='1234.5678v6',
+            title='tiiiitle',
+            source={
+                'flags': 'A',
+                'format': 'pdftotex',
+                'size_bytes': 2
+            },
+            version=6,
+            latest='1234.5678v6',
+            latest_version=6,
+            report_num='somenum1',
+            msc_class=['c1'],
+            acm_class=['z2'],
+            journal_ref='somejournal (1991): 2-34',
+            doi='10.123456/7890',
+            comments='very science',
+            abs_categories='astro-ph.CO foo.BR',
+            formats=['pdf', 'other'],
+            primary_classification=domain.Classification(
+                group={'id': 'foo', 'name': 'Foo Group'},
+                archive={'id': 'foo', 'name': 'Foo Archive'},
+                category={'id': 'foo.BR', 'name': 'Foo Category'},
+            ),
+            secondary_classification=[
+                domain.Classification(
+                    group={'id': 'foo', 'name': 'Foo Group'},
+                    archive={'id': 'foo', 'name': 'Foo Archive'},
+                    category={'id': 'foo.BZ', 'name': 'Baz Category'},
+                )
+            ]
+        )
+        docs = domain.DocumentSet(
+            results=[document],
+            metadata={'start': 0, 'end': 1, 'size': 50, 'total': 1}
+        )
+
+        query = domain.APIQuery(include_fields=['abstract', 'license'])
+        r_data = {'results': docs, 'query': query}
+        mock_controller.search.return_value = r_data, status.HTTP_200_OK, {}
+        token = helpers.generate_token('1234', 'foo@bar.com', 'foouser',
+                                       scope=[auth.scopes.READ_PUBLIC])
+        response = self.client.get('/', headers={'Authorization': token})
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        data = json.loads(response.data)
+        res = jsonschema.RefResolver(
+            'file://%s/' % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
+            None
+        )
+        self.assertIsNone(jsonschema.validate(data, self.schema, resolver=res),
+                          'Response content is valid per schema')
+
+        for field in domain.api.get_required_fields():
+            self.assertEqual(
+                set(data['results'][0].keys()),
+                set(query.include_fields)
+            )

From ea982997abe9723a38c044ec6c7cfd33768af288 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 27 Sep 2018 08:30:10 -0400
Subject: [PATCH 22/77] fixed mypy errors

---
 search/controllers/api/__init__.py | 4 ++--
 search/services/index/__init__.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 4c361d34..5ba7f1da 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -26,7 +26,7 @@
 EASTERN = timezone('US/Eastern')
 
 
-def search(params: MultiDict) -> Tuple[DocumentSet, int, Dict[str, Any]]:
+def search(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     """
     Handle a search request from the API.
 
@@ -66,7 +66,7 @@ def search(params: MultiDict) -> Tuple[DocumentSet, int, Dict[str, Any]]:
     return {'results': document_set, 'query': q}, status.HTTP_200_OK, {}
 
 
-def paper(paper_id: str) -> Tuple[Document, int, Dict[str, Any]]:
+def paper(paper_id: str) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     """
     Handle a request for paper metadata from the API.
 
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index ff101adc..a7d7e664 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -407,7 +407,7 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
             # fields and configuration for highlighting.
             current_search = highlighting.highlight(current_search)
 
-        if hasattr(query, 'include_fields'):
+        if isinstance(query, APIQuery):
             current_search = current_search.extra(
                 _source={'include': query.include_fields}
             )

From fceaa7f33529428e8c05a321ac0ef5a4c9bf0483 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Fri, 28 Sep 2018 06:43:40 -0400
Subject: [PATCH 23/77] updated openapi spec

---
 docs/source/search_api.rst |   6 +-
 schema/search.yaml         | 155 +++++++++++++++++++++++++++++++++++--
 2 files changed, 150 insertions(+), 11 deletions(-)

diff --git a/docs/source/search_api.rst b/docs/source/search_api.rst
index 693ef923..a967a595 100644
--- a/docs/source/search_api.rst
+++ b/docs/source/search_api.rst
@@ -30,7 +30,5 @@ The service endpoints are defined in :mod:`search.routes.api`:
 - The paper metadata endpoint :func:`search.routes.api.paper` provides more
   detailed metadata for a specific arXiv e-print.
 
-Requests are handled by the controllers in :mod:`search.controllers.api`. As of
-this writing, the :class:`search.domain.api.APIQuery` domain class is identical
-to :class:`search.domain.advanced.AdvancedQuery`, but this may change in future
-versions.
+Requests are handled by the controllers in :mod:`search.controllers.api`, using
+the :class:`search.domain.api.APIQuery` domain class.
diff --git a/schema/search.yaml b/schema/search.yaml
index 73b15c4b..d691b46e 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -1,16 +1,18 @@
 openapi: "3.0.0"
 info:
-  version: "0.1"
+  version: "0.1.2"
   title: "arXiv Search API"
-  description: "A RESTful API for arXiv metadata."
-  termsOfService: "https://arxiv.org/help/general",
+  description: |
+    A query API for arXiv paper metadata.
+  termsOfService: "https://arxiv.org/help/general"
   contact:
     name: "arXiv API Team"
     email: nextgen@arxiv.org
   license:
     name: MIT
 servers:
-  - url: https://arxiv.org/api
+  - url: https://api.arxiv.org/metadata/
+    description: Metadata API endpoint.
 paths:
   /:
     get:
@@ -23,7 +25,142 @@ paths:
           in: query
           description: |
             Performs a query across all fields. Has the same behavior as the
-            simple search function on the main arXiv website.
+            simple search function on the main arXiv website. Supports quoted
+            literals, wildcards, etc.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: author
+          in: query
+          description: |
+            Search by author name. For the most precise name search, use
+            ``surname(s), forename(s)`` or ``surname(s), initial(s)``. Author
+            names enclosed in quotes will return only exact matches only.
+            Diacritic character variants are automatically searched. For
+            more information about the limitations of searching by author name,
+            see https://blogs.cornell.edu/arxiv/2018/05/04/release-search-v0-2-some-notes-on-names/.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: title
+          in: query
+          description: |
+            Text search in paper titles. Supports quoted literals and
+            wildcards.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: abstract
+          in: query
+          description: |
+            Text search in paper abstracts. Supports quoted literals and
+            wildcards.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: comments
+          in: query
+          description: |
+            Text search in paper comments. Supports quoted literals and
+            wildcards. See https://arxiv.org/help/prep#comments for a
+            description of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: journal_ref
+          in: query
+          description: |
+            Text search in journal reference. Supports quoted literals and
+            wildcards. See https://arxiv.org/help/prep#journal for a
+            description of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: report_num
+          in: query
+          description: |
+            Text search in report number. Supports quoted literals and
+            wildcards. See https://arxiv.org/help/prep#report for a description
+            of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: acm_class
+          in: query
+          description: |
+            Keyword match on ACM classification code. Supports wildcards. See
+            https://arxiv.org/help/prep#acm for a description of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: msc_class
+          in: query
+          description: |
+            Keyword match on MSC classification code. Supports wildcards. See
+            https://arxiv.org/help/prep#msc for a description of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: doi
+          in: query
+          description: |
+            Keyword match on DOI. Supports wildcards. See
+            https://arxiv.org/help/prep#doi for a description of this field.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: paper_id
+          in: query
+          description: |
+            Keyword match on arXiv paper ID, with our without a version affix.
+            Supports wildcards. See https://arxiv.org/help/arxiv_identifier for
+            information about arXiv paper identifiers.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: orcid
+          in: query
+          description: |
+            Match on author ORCID ID. For information about ORCID IDs,
+            see https://arxiv.org/help/orcid. Note that ORCID IDs are only
+            available for the submitter and any author-owners who have claimed
+            the paper. See also
+            https://blogs.cornell.edu/arxiv/2018/05/04/release-search-v0-2-some-notes-on-names/.
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: string
+        - name: author_id
+          in: query
+          description: |
+            Match on arXiv author identifier. For more information about author
+            identifiers, see https://arxiv.org/help/author_identifiers. Note
+            that author identifiers are only available for the submitter and
+            any author-owners who have claimed the paper. See also
+            https://blogs.cornell.edu/arxiv/2018/05/04/release-search-v0-2-some-notes-on-names/.
           required: false
           style: form
           explode: true
@@ -33,7 +170,8 @@ paths:
           in: query
           description: |
             Slug for the primary category or categories to which results
-            should be limited.
+            should be limited. Valid categories are defined at
+            https://github.com/cul-it/arxiv-base/blob/master/arxiv/taxonomy/__init__.py#L306.
           required: false
           style: form
           explode: true
@@ -68,7 +206,10 @@ paths:
                 $ref: '#/components/schemas/Error'
   /{id}:
     get:
-      description: Return metadata about an arXiv paper by arXiv ID.
+      description: |
+        Get metadata about an arXiv paper by arXiv ID. See
+        https://arxiv.org/help/arxiv_identifier for information about arXiv
+        paper identifiers.
       operationId: getPaperByID
       parameters:
         - name: id

From e6ac1c3a0c3e36f5bf5eaa711991ba539217d3ad Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Fri, 28 Sep 2018 07:09:53 -0400
Subject: [PATCH 24/77] minor

---
 schema/search.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/schema/search.yaml b/schema/search.yaml
index d691b46e..339299d9 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -183,7 +183,8 @@ paths:
           in: query
           description: |
             Fields to include in the result set, in addition to paper ID and
-            version.
+            version. See ``DocumentMetadata.json`` specification for all
+            available fields.
           required: false
           style: form
           explode: true

From 7cccd8a7ff0aa3022346aed0212bf35f7408bd22 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Fri, 28 Sep 2018 11:10:31 -0400
Subject: [PATCH 25/77] added canonical URI to latest and parent metadata
 object

---
 schema/resources/Document.json | 18 +++++++++++++++---
 search/config.py               |  1 +
 search/domain/api.py           |  2 +-
 search/routes/api/serialize.py |  6 +++++-
 4 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index 569f24af..12f5bf8f 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -203,19 +203,31 @@
         },
         "href": {
           "type": "string",
-          "format": "uri"
+          "format": "uri",
+          "description": "Location of the detailed metadata record available via the API"
+        },
+        "canonical": {
+          "type": "string",
+          "format": "uri",
+          "description": "Canonical arXiv URI."
         },
         "paper_id": {
           "description": "Paper ID with version affix of latest version.",
           "type": "string"
         }
       }
-    }
+    },
+    "canonical": {
+      "type": "string",
+      "format": "uri",
+      "description": "Canonical arXiv URI."
+    },
   },
   "required": [
     "paper_id",
     "paper_id_v",
     "version",
-    "href"
+    "href",
+    "canonical"
   ]
 }
diff --git a/search/config.py b/search/config.py
index 6d306189..a1488c7b 100644
--- a/search/config.py
+++ b/search/config.py
@@ -233,6 +233,7 @@
 
 URLS = [
     ("pdf", "/pdf/<arxiv:paper_id>v<string:version>", BASE_SERVER),
+    ("abs", "/abs/<arxiv:paper_id>v<string:version>", BASE_SERVER),
     ("pdfonly", "/pdf/<arxiv:paper_id>v<string:version>", BASE_SERVER),
     ("dvi", "/dvi/<arxiv:paper_id>v<string:version>", BASE_SERVER),
     ("html", "/html/<arxiv:paper_id>v<string:version>", BASE_SERVER),
diff --git a/search/domain/api.py b/search/domain/api.py
index e806cbc2..bbe41f51 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -14,7 +14,7 @@ def get_default_extra_fields() -> List[str]:
 
 def get_required_fields() -> List[str]:
     """These fields should always be included."""
-    return ['paper_id', 'paper_id_v', 'version', 'href']
+    return ['paper_id', 'paper_id_v', 'version', 'href', 'canonical']
 
 
 @dataclass
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 60eca787..369b1c94 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -54,6 +54,8 @@ def _transform_latest(cls, document: Document) -> Optional[dict]:
             "href": url_for("api.paper", paper_id=document.paper_id,
                             version=document.latest_version,
                             _external=True),
+            "canonical": url_for("abs", paper_id=document.paper_id,
+                                 version=document.latest_version),
             "version": document.latest_version
         }
 
@@ -119,7 +121,9 @@ def transform_document(cls, doc: Document,
             ('version', doc.version),
             ('latest', cls._transform_latest(doc)),
             ('href', url_for("api.paper", paper_id=doc.paper_id,
-                             version=doc.version, _external=True))
+                             version=doc.version, _external=True)),
+            ('canonical', url_for("abs", paper_id=doc.paper_id,
+                                  version=doc.version))
         ]
 
         # Only return fields that have been explicitly requested.

From b21e2f96288251ace6faf8438f1cb0d66d115e44 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Fri, 28 Sep 2018 11:26:08 -0400
Subject: [PATCH 26/77] added documentation for date parameters, and fixed a
 bug

---
 schema/search.yaml                 | 36 ++++++++++++++++++++++++++++++
 search/controllers/api/__init__.py |  4 +++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/schema/search.yaml b/schema/search.yaml
index 339299d9..9fdf24ef 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -192,6 +192,42 @@ paths:
             type: array
             items:
               type: string
+        - name: start_date
+          in: query
+          description: |
+            Limit results to papers submitted or announced (see ``date_type``)
+            on or after this date.
+          required: false
+          style: form
+          schema:
+            type: string
+            format: date
+        - name: end_date
+          in: query
+          description: |
+            Limit results to papers submitted or announced (see ``date_type``)
+            on or before this date.
+          required: false
+          style: form
+          schema:
+            type: string
+            format: date
+        - name: date_type
+          in: query
+          description: |
+            The date property used to apply ``start_date`` and ``end_date``
+            filters. Note that ``announced_date_first`` has only year and month
+            precision.
+          required: false
+          style: form
+          schema:
+            type: string
+            default: submitted_date
+            enum:
+              - submitted_date_first
+              - submitted_date
+              - announced_date_first
+
       responses:
         '200':
           description: All arXiv papers that respond to specified query.
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 5ba7f1da..481357a5 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -124,7 +124,7 @@ def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
 
 def _get_date_params(params: MultiDict) -> Optional[DateRange]:
     date_params = {}
-    for field in ['start_date', 'end_date', 'date_type']:
+    for field in ['start_date', 'end_date']:
         value = params.getlist(field)
         if not value:
             continue
@@ -136,6 +136,8 @@ def _get_date_params(params: MultiDict) -> Optional[DateRange]:
         except ValueError:
             raise BadRequest({'field': field, 'reason': 'invalid datetime'})
         date_params[field] = dt
+    if 'date_type' in params:
+        date_params['date_type'] = params.get('date_type')
     if date_params:
         return DateRange(**date_params)  # type: ignore
     return None

From 9c2127ad2ef351833543792660d60521c3cfa3b0 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Fri, 28 Sep 2018 11:27:00 -0400
Subject: [PATCH 27/77] fixed typo in jsonschema

---
 schema/resources/Document.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index 12f5bf8f..7b5f4a3d 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -221,7 +221,7 @@
       "type": "string",
       "format": "uri",
       "description": "Canonical arXiv URI."
-    },
+    }
   },
   "required": [
     "paper_id",

From b8606fedd280df3c643a146fabfd994afc33a14c Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Thu, 18 Oct 2018 14:16:38 -0400
Subject: [PATCH 28/77] Fix template to display secondaries

---
 search/templates/search/search-macros.html | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/search/templates/search/search-macros.html b/search/templates/search/search-macros.html
index a7b64000..199cb7c0 100644
--- a/search/templates/search/search-macros.html
+++ b/search/templates/search/search-macros.html
@@ -132,13 +132,13 @@
   <li class="arxiv-result">
     <div class="level is-marginless">
       <p class="list-title level-left">
-        {% set display_paper_id = result.paper_id if is_current(result) else result.paper_id_v %}
+        {%- set display_paper_id = result.paper_id if is_current(result) else result.paper_id_v -%}
         <a href="{{ external_url('browse', 'abstract', paper_id=display_paper_id) }}">arXiv:{{ display_paper_id }}</a>
         <span>&nbsp;{{ paper_format_links(result, external_url, display_paper_id) }}&nbsp;</span>
-        <span class="tag is-small {% if result.highlight.primary_classification %}search-hit{% else %}is-link{% endif %} tooltip is-tooltip-right" data-tooltip="{{ result.primary_classification.category.name }}">{{ result.primary_classification.category.id }}</span>
-        {% if result.secondary_category %}
-          {% for secondary in result.secondary_category %}
-            <span class="tag is-small is-grey">{{ secondary.archive }} {{ secondary.category }}</span>
+        <span class="tag is-small {% if result.highlight.primary_classification %}search-hit{% else %}is-link{% endif %} tooltip is-tooltip-top" data-tooltip="{{ result.primary_classification.category.name }}">{{ result.primary_classification.category.id }}</span>
+        {% if result.secondary_classification %}
+          {% for secondary in result.secondary_classification %}
+            <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="{{secondary.category.name}}">{{ secondary.category.id }}</span>
           {% endfor %}
         {% endif -%}
       </p>

From 6d9ba5fdfa179cd438e8f01a9d1b37057d7ab3b4 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 07:15:14 -0500
Subject: [PATCH 29/77] ARXIVNG-1348 updated requests to >= 2.20.0

---
 Pipfile                                |   2 +-
 Pipfile.lock                           |  61 ++--
 schema/resources/Document.json         | 383 +++++++++++++------------
 schema/resources/DocumentMetadata.json |   3 +-
 schema/resources/DocumentSet.json      |   3 +-
 schema/resources/Person.json           |   1 -
 schema/search.yaml                     |   4 +-
 7 files changed, 226 insertions(+), 231 deletions(-)

diff --git a/Pipfile b/Pipfile
index fe3ff4c6..ac576742 100644
--- a/Pipfile
+++ b/Pipfile
@@ -38,7 +38,7 @@ pyflakes = "==1.6.0"
 pylama = "==7.4.3"
 python-dateutil = "==2.6.1"
 pytz = "==2017.3"
-requests = "==2.18.4"
+requests = ">=2.20.0"
 "s3transfer" = "==0.1.13"
 snowballstemmer = "==1.2.1"
 thrift = "==0.11.0"
diff --git a/Pipfile.lock b/Pipfile.lock
index 1ea189d1..6518520d 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "d1b1281c8c786c7f3ca24513df4fea25993b9b4bb08fcf0c66586d6455701ac4"
+            "sha256": "101c09af409fbddc9ec18be854f563ff382ddf6fec3a8781617c817b8d4c68b9"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -30,11 +30,11 @@
         },
         "bleach": {
             "hashes": [
-                "sha256:0ee95f6167129859c5dce9b1ca291ebdb5d8cd7e382ca0e237dfd0dad63f63d8",
-                "sha256:24754b9a7d530bf30ce7cbc805bc6cce785660b4a10ff3a43633728438c105ab"
+                "sha256:48d39675b80a75f6d1c3bdbffec791cf0bbbab665cf01e20da701c77de278718",
+                "sha256:73d26f018af5d5adcdabf5c1c974add4361a9c76af215fe32fdec8a6fc5fb9b9"
             ],
             "index": "pypi",
-            "version": "==2.1.4"
+            "version": "==3.0.2"
         },
         "boto": {
             "hashes": [
@@ -180,13 +180,6 @@
             "index": "pypi",
             "version": "==0.3.3"
         },
-        "html5lib": {
-            "hashes": [
-                "sha256:20b159aa3badc9d5ee8f5c647e5efd02ed2a66ab8d354930bd9ff139fc1dc0a3",
-                "sha256:66cb0dcfdbbc4f9c3ba1a63fdb511ffdbd4f513b2b6d81b80cd26ce6b3fb3736"
-            ],
-            "version": "==1.0.1"
-        },
         "idna": {
             "hashes": [
                 "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f",
@@ -422,11 +415,11 @@
         },
         "requests": {
             "hashes": [
-                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
-                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+                "sha256:65b3a120e4329e33c9889db89c80976c5272f56ea92d3e74da8a463992e3ff54",
+                "sha256:ea881206e59f41dbd0bd445437d792e43906703fff75ca8ff43ccdb11f33f263"
             ],
             "index": "pypi",
-            "version": "==2.18.4"
+            "version": "==2.20.1"
         },
         "s3transfer": {
             "hashes": [
@@ -453,9 +446,9 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:c5951d9ef1d5404ed04bae5a16b60a0779087378928f997a294d1229c6ca4d3e"
+                "sha256:9de7c7dabcf06319becdb7e15099c44e5e34ba7062f9ba10bc00e562f5db3d04"
             ],
-            "version": "==1.2.12"
+            "version": "==1.2.14"
         },
         "thrift": {
             "hashes": [
@@ -541,10 +534,10 @@
     "develop": {
         "alabaster": {
             "hashes": [
-                "sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
-                "sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
             ],
-            "version": "==0.7.11"
+            "version": "==0.7.12"
         },
         "babel": {
             "hashes": [
@@ -617,11 +610,11 @@
         },
         "coveralls": {
             "hashes": [
-                "sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22",
-                "sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a"
+                "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
+                "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
             ],
             "index": "pypi",
-            "version": "==1.5.0"
+            "version": "==1.5.1"
         },
         "docopt": {
             "hashes": [
@@ -684,10 +677,10 @@
         },
         "pyparsing": {
             "hashes": [
-                "sha256:905d8090c335314568b5faee0025b1829f27bb974604a5762a6cdef3a7dfc3b7",
-                "sha256:f493ee323be1e94929416b3585eefcc04943115cecbaaa35a8c86d1a2368af19"
+                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
+                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
             ],
-            "version": "==2.2.1"
+            "version": "==2.3.0"
         },
         "pytz": {
             "hashes": [
@@ -706,11 +699,11 @@
         },
         "requests": {
             "hashes": [
-                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
-                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+                "sha256:65b3a120e4329e33c9889db89c80976c5272f56ea92d3e74da8a463992e3ff54",
+                "sha256:ea881206e59f41dbd0bd445437d792e43906703fff75ca8ff43ccdb11f33f263"
             ],
             "index": "pypi",
-            "version": "==2.18.4"
+            "version": "==2.20.1"
         },
         "six": {
             "hashes": [
@@ -729,19 +722,19 @@
         },
         "sphinx": {
             "hashes": [
-                "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
-                "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
+                "sha256:120732cbddb1b2364471c3d9f8bfd4b0c5b550862f99a65736c77f970b142aea",
+                "sha256:b348790776490894e0424101af9c8413f2a86831524bd55c5f379d3e3e12ca64"
             ],
             "index": "pypi",
-            "version": "==1.8.1"
+            "version": "==1.8.2"
         },
         "sphinx-autodoc-typehints": {
             "hashes": [
-                "sha256:1a9df6cb3ba72453ea4bfbe96ea887abc0d796b2ce9508c2189217a1bb69b366",
-                "sha256:46cc9e985ee6d8bbbd07fffd95b815c39a72df6afb600f59671f85f7340e7d0d"
+                "sha256:0b050d782dd8c31da7078d2477f0682d51caa265c812329e7e07e74c2adb6f60",
+                "sha256:978c5f5ccc8cdf398efedfc3e77d08abeb0a2f4754813e77c0cbb0cd774fca82"
             ],
             "index": "pypi",
-            "version": "==1.3.0"
+            "version": "==1.4.0"
         },
         "sphinxcontrib-websupport": {
             "hashes": [
diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index 7b5f4a3d..1e8b5a24 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -1,233 +1,238 @@
 {
-  "$schema": "http://json-schema.org/schema#",
-  "title": "Document",
-  "description": "Schema for arXiv document metadata returned by the search API.",
-  "definitions": {
-    "category": {
-      "type": "object",
-      "properties": {
-        "id": {
+  "Document": {
+    "title": "Document",
+    "description": "Schema for arXiv document metadata returned by the search API.",
+    "type": "object",
+    "properties": {
+      "abs_categories": {
+        "description": "Categories as they would appear on the /abs page",
+        "type": "string"
+      },
+      "abstract": {
+        "type": "string"
+      },
+      "acm_class": {
+        "description": "Classifications from ACM Computing Classification System",
+        "type": "array",
+        "items": {
           "type": "string"
         },
-        "name": {
-          "type": "string"
+        "uniqueItems": true
+      },
+      "owners": {
+        "items": {
+          "type": "object",
+          "$ref": "./Person.json"
         }
       },
-      "required": ["id", "name"]
-    },
-    "archive": {
-      "type": "object",
-      "properties": {
-        "id": {
-          "type": "string"
+      "authors": {
+        "type": "array",
+        "items": {
+          "type": "object",
+          "$ref": "./Person.json"
         },
-        "name": {
-          "type": "string"
+        "minItems": 1
+      },
+      "comments": {
+        "type": "string"
+      },
+      "submitted_date": {
+        "description": "Date this version of paper was submitted.",
+        "type": "string",
+        "format": "date-time"
+      },
+      "announced_date_first": {
+        "description": "Year and month (``%Y-%m``) the paper was originally announced.",
+        "type": "string"
+      },
+      "paper_id": {
+        "description": "arXiv paper identifier without version affix.",
+        "type": "string"
+      },
+      "paper_id_v": {
+        "description": "arXiv paper identifier with version affix.",
+        "type": "string"
+      },
+      "doi": {
+        "type": "string"
+      },
+      "formats": {
+        "description": "Derivative paper formats available to users",
+        "type": "array",
+        "minItems": 0,
+        "items": {
+          "type": "object",
+          "properties": {
+            "format": {
+              "type": "string"
+            },
+            "href": {
+              "type": "string",
+              "format": "uri"
+            }
+          }
         }
       },
-      "required": ["id", "name"]
-    },
-    "group": {
-      "type": "object",
-      "properties": {
-        "id": {
+      "href": {
+        "type": "string",
+        "format": "uri"
+      },
+      "fulltext": {
+        "type": "string"
+      },
+      "is_current": {
+        "type": "boolean"
+      },
+      "is_withdrawn": {
+        "type": "boolean"
+      },
+      "journal_ref": {
+        "type": "string"
+      },
+      "license": {
+        "type": "object",
+        "properties": {
+          "href": {
+            "type": "string",
+            "nullable": true
+          },
+          "label": {
+            "type": "string",
+            "nullable": true
+          }
+        }
+      },
+      "msc_class": {
+        "description": "Classifications from American Mathematical Society Mathematical Subject Classification (MSC)",
+        "type": "array",
+        "items": {
           "type": "string"
         },
-        "name": {
-          "type": "string"
-        }
+        "uniqueItems": true
       },
-      "required": ["id", "name"]
-    },
-    "classification": {
-      "type": "object",
-      "properties": {
-        "archive": {"$ref": "#/definitions/archive"},
-        "group": {"$ref": "#/definitions/group"},
-        "category": {"$ref": "#/definitions/category"}
-      }
-    }
-  },
-  "type": "object",
-  "properties": {
-    "abs_categories": {
-      "description": "Categories as they would appear on the /abs page",
-      "type": "string"
-    },
-    "abstract": {
-      "type": "string"
-    },
-    "acm_class": {
-      "description": "Classifications from ACM Computing Classification System",
-      "type": "array",
-      "items": {
+      "primary_classification": {
+        "$ref": "./Document.json#/definitions/classification"
+      },
+      "secondary_classification": {
+        "type": "array",
+        "items": {"$ref": "./Document.json#/definitions/classification"}
+      },
+      "report_num": {
         "type": "string"
       },
-      "uniqueItems": true
-    },
-    "owners": {
-      "items": {
-        "type": "object",
-        "$ref": "./Person.json"
-      }
-    },
-    "authors": {
-      "items": {
+      "source": {
+        "properties": {
+          "flags": {
+            "type": "string",
+            "nullable": true
+          },
+          "format": {
+            "type": "string",
+            "nullable": true
+          },
+          "size_bytes": {
+            "minimum": 0,
+            "type": "integer"
+          }
+        },
+        "required": ["size_bytes"]
+      },
+      "submitter": {
+        "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
         "type": "object",
         "$ref": "./Person.json"
       },
-      "minItems": 1,
-      "type": "array"
-    },
-    "comments": {
-      "type": ["string"]
-    },
-    "submitted_date": {
-      "description": "Date this version of paper was submitted.",
-      "type": "string",
-      "format": "date-time"
-    },
-    "announced_date_first": {
-      "desription": "Year and month (``%Y-%m``) the paper was originally announced.",
-      "type": "string"
-    },
-    "paper_id": {
-      "description": "arXiv paper identifier without version affix.",
-      "type": "string"
-    },
-    "paper_id_v": {
-      "description": "arXiv paper identifier with version affix.",
-      "type": "string"
-    },
-    "doi": {
-      "type": "string"
-    },
-    "formats": {
-      "description": "Derivative paper formats available to users",
-      "type": "array",
-      "minItems": 0,
-      "items": {
+      "title": {
+        "type": "string"
+      },
+      "version": {
+        "description": "The version number for this paper.",
+        "minimum": 1,
+        "type": "integer"
+      },
+      "latest": {
+        "description": "arXiv paper identifier (with version affix) of latest version of this paper.",
         "type": "object",
         "properties": {
-          "format": {
-            "type": "string"
+          "version": {
+            "description": "Number of the latest version of this paper.",
+            "minimum": 1,
+            "type": "integer"
           },
           "href": {
             "type": "string",
-            "format": "uri"
+            "format": "uri",
+            "description": "Location of the detailed metadata record available via the API"
+          },
+          "canonical": {
+            "type": "string",
+            "format": "uri",
+            "description": "Canonical arXiv URI."
+          },
+          "paper_id": {
+            "description": "Paper ID with version affix of latest version.",
+            "type": "string"
           }
         }
+      },
+      "canonical": {
+        "type": "string",
+        "format": "uri",
+        "description": "Canonical arXiv URI."
       }
     },
-    "href": {
-      "type": "string",
-      "format": "uri"
-    },
-    "fulltext": {
-      "type": "string"
-    },
-    "is_current": {
-      "type": "boolean"
-    },
-    "is_withdrawn": {
-      "type": "boolean"
-    },
-    "journal_ref": {
-      "type": ["string"]
-    },
-    "license": {
+    "required": [
+      "paper_id",
+      "paper_id_v",
+      "version",
+      "href",
+      "canonical"
+    ]
+  },
+  "definitions": {
+    "category": {
       "type": "object",
       "properties": {
-        "href": {
-          "type": ["string", "null"]
+        "fid": {
+          "type": "string"
         },
-        "label": {
-          "type": ["string", "null"]
+        "name": {
+          "type": "string"
         }
-      }
-    },
-    "msc_class": {
-      "description": "Classifications from American Mathematical Society Mathematical Subject Classification (MSC)",
-      "type": "array",
-      "items": {
-        "type": "string"
       },
-      "uniqueItems": true
-    },
-    "primary_classification": {
-      "$ref": "./Document.json#/definitions/classification"
-    },
-    "secondary_classification": {
-      "type": "array",
-      "items": {"$ref": "./Document.json#/definitions/classification"}
+      "required": ["fid", "name"]
     },
-    "report_num": {
-      "type": "string"
-    },
-    "source": {
+    "archive": {
+      "type": "object",
       "properties": {
-        "flags": {
-          "type": ["string", "null"]
-        },
-        "format": {
-          "type": ["string", "null"]
+        "fid": {
+          "type": "string"
         },
-        "size_bytes": {
-          "minimum": 0,
-          "type": "integer"
+        "name": {
+          "type": "string"
         }
       },
-      "required": ["size_bytes"]
-    },
-    "submitter": {
-      "description": "Submitter data. Name may not match those associated with user account, since user data is copied to submission data at the time of submission creation.",
-      "type": "object",
-      "$ref": "./Person.json"
+      "required": ["fid", "name"]
     },
-    "title": {
-      "type": "string"
-    },
-    "version": {
-      "description": "The version number for this paper.",
-      "minimum": 1,
-      "type": "integer"
-    },
-    "latest": {
-      "description": "arXiv paper identifier (with version affix) of latest version of this paper.",
+    "group": {
       "type": "object",
       "properties": {
-        "version": {
-          "description": "Number of the latest version of this paper.",
-          "minimum": 1,
-          "type": "integer"
-        },
-        "href": {
-          "type": "string",
-          "format": "uri",
-          "description": "Location of the detailed metadata record available via the API"
-        },
-        "canonical": {
-          "type": "string",
-          "format": "uri",
-          "description": "Canonical arXiv URI."
+        "fid": {
+          "type": "string"
         },
-        "paper_id": {
-          "description": "Paper ID with version affix of latest version.",
+        "name": {
           "type": "string"
         }
-      }
+      },
+      "required": ["fid", "name"]
     },
-    "canonical": {
-      "type": "string",
-      "format": "uri",
-      "description": "Canonical arXiv URI."
+    "classification": {
+      "type": "object",
+      "properties": {
+        "archive": {"$ref": "#/definitions/archive"},
+        "group": {"$ref": "#/definitions/group"},
+        "category": {"$ref": "#/definitions/category"}
+      }
     }
-  },
-  "required": [
-    "paper_id",
-    "paper_id_v",
-    "version",
-    "href",
-    "canonical"
-  ]
+  }
 }
diff --git a/schema/resources/DocumentMetadata.json b/schema/resources/DocumentMetadata.json
index ee7f1203..0f6fb3ab 100644
--- a/schema/resources/DocumentMetadata.json
+++ b/schema/resources/DocumentMetadata.json
@@ -1,5 +1,4 @@
 {
-    "$schema": "http://json-schema.org/schema#",
     "title": "DocumentMetadata",
     "description": "Schema for arXiv document metadata provided by the docmeta endpoint.",
     "definitions": {
@@ -148,7 +147,7 @@
             "minItems": 0
         },
         "announced_first": {
-          "desription": "Date (year and month) the paper was originally announced",
+          "description": "Date (year and month) the paper was originally announced",
           "type": "string"
         },
         "updated_date": {
diff --git a/schema/resources/DocumentSet.json b/schema/resources/DocumentSet.json
index 1b53b35d..b664a7b1 100644
--- a/schema/resources/DocumentSet.json
+++ b/schema/resources/DocumentSet.json
@@ -1,5 +1,4 @@
 {
-  "$schema": "http://json-schema.org/schema#",
   "title": "DocumentSet",
   "description": "A set of documents that respond to a query.",
   "type": "object",
@@ -26,7 +25,7 @@
       "type": "array",
       "items": {
         "type": "object",
-        "$ref": "Document.json"
+        "$ref": "Document.json#Document"
       }
     }
   }
diff --git a/schema/resources/Person.json b/schema/resources/Person.json
index 3b386c00..dec4cec1 100644
--- a/schema/resources/Person.json
+++ b/schema/resources/Person.json
@@ -1,5 +1,4 @@
 {
-  "$schema": "http://json-schema.org/schema#",
   "title": "Person",
   "description": "Schema for person in metadata returned by the search API.",
   "type": "object",
diff --git a/schema/search.yaml b/schema/search.yaml
index 9fdf24ef..e77367db 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -234,7 +234,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: './resources/DocumentSet.json#DocumentSet'
+                $ref: './resources/DocumentSet.json#'
         default:
           description: unexpected error
           content:
@@ -255,7 +255,7 @@ paths:
           required: true
           schema:
             type: string
-      response:
+      responses:
         '200':
           description: Metadata about the requested arXiv paper.
           content:

From c6a0ed246c6c944ec53ff5f2b64fe7e73aca9996 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 07:28:40 -0500
Subject: [PATCH 30/77] APPLICATION_ROOT defaults to /; updated Flask to v1

---
 Pipfile                        |  4 ++--
 Pipfile.lock                   | 14 +++++++-------
 schema/resources/Document.json | 12 ++++++------
 search/config.py               |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/Pipfile b/Pipfile
index ac576742..8f36bd36 100644
--- a/Pipfile
+++ b/Pipfile
@@ -17,7 +17,7 @@ dataclasses = "==0.4"
 docutils = "==0.14"
 elasticsearch = "==6.2.0"
 elasticsearch-dsl = "==6.1.0"
-flask = "==0.12.2"
+flask = "==1.0.2"
 "flask-s3" = "==0.3.3"
 idna = "==2.6"
 ipaddress = "==1.0.19"
@@ -45,7 +45,7 @@ thrift = "==0.11.0"
 thrift-connector = "==0.23"
 typed-ast = "==1.1.0"
 "urllib3" = "==1.22"
-werkzeug = "==0.13"
+werkzeug = ">=0.14"
 wtforms = "==2.1"
 bleach = "*"
 lxml = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 6518520d..def0b52b 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "101c09af409fbddc9ec18be854f563ff382ddf6fec3a8781617c817b8d4c68b9"
+            "sha256": "37d01fb3e6bdfcf42c7d9a70424093f1bd5226875976c54f6b7f5d0f03e65866"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -165,11 +165,11 @@
         },
         "flask": {
             "hashes": [
-                "sha256:0749df235e3ff61ac108f69ac178c9770caeaccad2509cb762ce1f65570a8856",
-                "sha256:49f44461237b69ecd901cc7ce66feea0319b9158743dd27a2899962ab214dac1"
+                "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48",
+                "sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05"
             ],
             "index": "pypi",
-            "version": "==0.12.2"
+            "version": "==1.0.2"
         },
         "flask-s3": {
             "hashes": [
@@ -517,11 +517,11 @@
         },
         "werkzeug": {
             "hashes": [
-                "sha256:6246e5fc98a505824113fb6aca993d45ea284a2bcffdc2c65d0c538e53e4abd3",
-                "sha256:f3000aa146ce8a9da8ca3e978e0e931c2a58eb56c323a5efb6b4307f7832b549"
+                "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c",
+                "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b"
             ],
             "index": "pypi",
-            "version": "==0.13"
+            "version": "==0.14.1"
         },
         "wtforms": {
             "hashes": [
diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index 1e8b5a24..c55ead5f 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -193,38 +193,38 @@
     "category": {
       "type": "object",
       "properties": {
-        "fid": {
+        "id": {
           "type": "string"
         },
         "name": {
           "type": "string"
         }
       },
-      "required": ["fid", "name"]
+      "required": ["id", "name"]
     },
     "archive": {
       "type": "object",
       "properties": {
-        "fid": {
+        "id": {
           "type": "string"
         },
         "name": {
           "type": "string"
         }
       },
-      "required": ["fid", "name"]
+      "required": ["id", "name"]
     },
     "group": {
       "type": "object",
       "properties": {
-        "fid": {
+        "id": {
           "type": "string"
         },
         "name": {
           "type": "string"
         }
       },
-      "required": ["fid", "name"]
+      "required": ["id", "name"]
     },
     "classification": {
       "type": "object",
diff --git a/search/config.py b/search/config.py
index a1488c7b..acfb5141 100644
--- a/search/config.py
+++ b/search/config.py
@@ -57,7 +57,7 @@
 application context.
 """
 
-APPLICATION_ROOT = os.environ.get('APPLICATION_ROOT', None)
+APPLICATION_ROOT = os.environ.get('APPLICATION_ROOT', '/')
 """
 If the application does not occupy a whole domain or subdomain this can be set
 to the path where the application is configured to live. This is for session

From b8cadf277dd8143c25ec6f83bd361d245427b01c Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 08:29:45 -0500
Subject: [PATCH 31/77] ARXIVNG-1347 added cross list search option in
 advanced; ARXIVNG-1278 all-fields includes cross-list search

---
 search/controllers/advanced/forms.py  | 20 ++++----------------
 search/domain/advanced.py             | 17 +++++++++++++++++
 search/routes/ui.py                   |  1 +
 search/services/index/highlighting.py |  4 ++++
 search/services/index/prepare.py      | 22 +++++++++++++++++++++-
 5 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/search/controllers/advanced/forms.py b/search/controllers/advanced/forms.py
index 3512ad54..40b4eeea 100644
--- a/search/controllers/advanced/forms.py
+++ b/search/controllers/advanced/forms.py
@@ -13,7 +13,7 @@
 from wtforms import widgets
 
 from arxiv import taxonomy
-from search.domain import DateRange
+from search.domain import DateRange, AdvancedQuery
 from search.controllers.util import does_not_start_with_wildcard, \
                                     strip_white_space, has_balanced_quotes
 
@@ -73,21 +73,7 @@ class FieldForm(Form):
     operator = SelectField("Operator", choices=[
         ('AND', 'AND'), ('OR', 'OR'), ('NOT', 'NOT')
     ], default='AND')
-    field = SelectField("Field", choices=[
-        ('title', 'Title'),
-        ('author', 'Author(s)'),
-        ('abstract', 'Abstract'),
-        ('comments', 'Comments'),
-        ('journal_ref', 'Journal reference'),
-        ('acm_class', 'ACM classification'),
-        ('msc_class', 'MSC classification'),
-        ('report_num', 'Report number'),
-        ('paper_id', 'arXiv identifier'),
-        ('doi', 'DOI'),
-        ('orcid', 'ORCID'),
-        ('author_id', 'arXiv author ID'),
-        ('all', 'All fields')
-    ])
+    field = SelectField("Field", choices=AdvancedQuery.SUPPORTED_FIELDS)
 
 
 class ClassificationForm(Form):
@@ -95,6 +81,8 @@ class ClassificationForm(Form):
 
     # pylint: disable=too-few-public-methods
 
+    # TODO: this should not be hard-coded!
+    #
     # Map arXiv archives to fields on this form. Ideally we would autogenerate
     # form fields based on the arXiv taxonomy, but this can't easily happen
     # until we replace the classic-style advanced interface with faceted
diff --git a/search/domain/advanced.py b/search/domain/advanced.py
index 25b51c9d..2d2dda30 100644
--- a/search/domain/advanced.py
+++ b/search/domain/advanced.py
@@ -34,6 +34,23 @@ class AdvancedQuery(Query):
 
     An advanced query contains fielded search terms and boolean operators.
     """
+    
+    SUPPORTED_FIELDS = [
+        ('title', 'Title'),
+        ('author', 'Author(s)'),
+        ('abstract', 'Abstract'),
+        ('comments', 'Comments'),
+        ('journal_ref', 'Journal reference'),
+        ('acm_class', 'ACM classification'),
+        ('msc_class', 'MSC classification'),
+        ('report_num', 'Report number'),
+        ('paper_id', 'arXiv identifier'),
+        ('cross_list_category', 'Cross-list category'),
+        ('doi', 'DOI'),
+        ('orcid', 'ORCID'),
+        ('author_id', 'arXiv author ID'),
+        ('all', 'All fields')
+    ]
 
     date_range: Optional[DateRange] = None
     primary_classification: ClassificationList = field(
diff --git a/search/routes/ui.py b/search/routes/ui.py
index efe8341d..5776be84 100644
--- a/search/routes/ui.py
+++ b/search/routes/ui.py
@@ -75,6 +75,7 @@ def search(archives: Optional[List[str]] = None) -> Union[str, Response]:
     response, code, headers = simple.search(request.args, archives)
     logger.debug(f"controller returned code: {code}")
     if code == status.HTTP_200_OK:
+        logger.debug('OK! %s', response)
         return render_template("search/search.html", pagetitle="Search",
                                archives=archives, **response)
     elif (code == status.HTTP_301_MOVED_PERMANENTLY
diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index 65fa6e42..757cbdcc 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -69,6 +69,8 @@ def highlight(search: Search) -> Search:
 
     search = search.highlight('primary_classification*', type='plain',
                               number_of_fragments=0)
+    search = search.highlight('secondary_classification*', type='plain',
+                              number_of_fragments=0)
     return search
 
 
@@ -182,6 +184,8 @@ def add_highlighting(result: dict, raw: Response) -> dict:
 
         if 'primary_classification' in field:
             field = 'primary_classification'
+        if 'secondary_classification' in field:
+            field = 'secondary_classification'
 
         # Non-TeX searches may hit inside of TeXisms. Highlighting those
         # fragments (i.e. inserting HTML) will break MathJax rendering.
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index 4f1fa313..c7485488 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -132,6 +132,24 @@ def _query_primary(term: str, operator: str = 'and') -> Q:
     )
 
 
+def _query_secondary(term: str, operator: str = 'and') -> Q:
+    if operator == 'or':
+        q = reduce(ior, [(
+            Q("match", **{"secondary_classification__category__id": {"query": part, "operator": operator}})
+            | Q("wildcard", **{"secondary_classification.category.name": f"*{part}*"})
+            | Q("match", **{"secondary_classification__archive__id": {"query": part, "operator": operator}})
+            | Q("wildcard", **{"secondary_classification.archive.name": f"*{part}*"})
+        ) for part in term.split()])
+    else:
+        q = (
+            Q("match", **{"secondary_classification__category__id": {"query": term, "operator": operator}})
+            | Q("match", **{"secondary_classification__category__name": {"query": term, "operator": operator}})
+            | Q("match", **{"secondary_classification__archive__id": {"query": term, "operator": operator}})
+            | Q("match", **{"secondary_classification__archive__name": {"query": term, "operator": operator}})
+        )
+    return Q("nested", path="secondary_classification", query=q)
+
+
 def _query_paper_id(term: str, operator: str = 'and') -> Q:
     operator = operator.lower()
     logger.debug(f'query paper ID with: {term}')
@@ -220,7 +238,8 @@ def _query_all_fields(term: str) -> Q:
         _query_report_num(term, operator='or'),
         _query_acm_class(term, operator='or'),
         _query_msc_class(term, operator='or'),
-        _query_primary(term, operator='or')
+        _query_primary(term, operator='or'),
+        _query_secondary(term, operator='or')
     ]
 
     # If the whole query matches on a specific field, we should consider that
@@ -372,6 +391,7 @@ def _to_q(classification: Classification) -> Q:
     ('report_num', _query_report_num),
     ('acm_class', _query_acm_class),
     ('msc_class', _query_msc_class),
+    ('cross_list_category', _query_secondary),
     ('doi', _query_doi),
     ('paper_id', _query_paper_id),
     ('orcid', orcid_query),

From c72a927e488c4cf73ad1fd364ef533fa823212b9 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 08:33:24 -0500
Subject: [PATCH 32/77] added handling of outsideallowedrange exception

---
 search/controllers/advanced/__init__.py | 5 +++++
 search/controllers/simple/__init__.py   | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/search/controllers/advanced/__init__.py b/search/controllers/advanced/__init__.py
index 22f23d09..f26a92cf 100644
--- a/search/controllers/advanced/__init__.py
+++ b/search/controllers/advanced/__init__.py
@@ -130,6 +130,11 @@ def search(request_params: MultiDict) -> Response:
                     "your search again.  If this problem persists, please "
                     "report it to help@arxiv.org."
                 ) from e
+            except index.OutsideAllowedRange as e:
+                raise BadRequest(
+                    "Hello clever friend. You can't get results in that range"
+                    " right now."
+                ) from e
             response_data['query'] = q
         else:
             logger.debug('form is invalid: %s', str(form.errors))
diff --git a/search/controllers/simple/__init__.py b/search/controllers/simple/__init__.py
index 3a2739a4..1a2f8f54 100644
--- a/search/controllers/simple/__init__.py
+++ b/search/controllers/simple/__init__.py
@@ -154,6 +154,11 @@ def search(request_params: MultiDict,
                 "search again.  If this problem persists, please report it to "
                 "help@arxiv.org."
             ) from e
+        except index.OutsideAllowedRange as e:
+            raise BadRequest(
+                "Hello clever friend. You can't get results in that range"
+                " right now."
+            ) from e
 
         except Exception as e:
             logger.error('Unhandled exception: %s', str(e))

From fb79a89205514b6294d8bc7da1864718ac9e2986 Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Mon, 12 Nov 2018 08:52:43 -0500
Subject: [PATCH 33/77] ARXIVNG-1277 add styling for secondary search catgories

---
 search/static/css/search.css               |  6 ++++++
 search/static/css/search.css.map           |  2 +-
 search/static/sass/search.sass             |  6 ++++++
 search/templates/search/search-macros.html | 24 ++++++++++++++--------
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/search/static/css/search.css b/search/static/css/search.css
index c65ebeba..d94214d1 100644
--- a/search/static/css/search.css
+++ b/search/static/css/search.css
@@ -2,6 +2,12 @@
   margin-bottom: 1.25em; }
   .content .arxiv-result p {
     margin-bottom: 0.25em; }
+  .content .arxiv-result .tags {
+    margin-bottom: 0.25rem; }
+    .content .arxiv-result .tags:last-child {
+      margin-bottom: 0; }
+    .content .arxiv-result .tags:not(:last-child) {
+      margin-bottom: 0; }
   .content .arxiv-result span.tag {
     height: 1.25rem; }
   .content .arxiv-result .list-title {
diff --git a/search/static/css/search.css.map b/search/static/css/search.css.map
index e133266d..034b7d33 100644
--- a/search/static/css/search.css.map
+++ b/search/static/css/search.css.map
@@ -1,6 +1,6 @@
 {
 "version": 3,
-"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,+BAAQ;IACN,MAAM,EAAE,MAAM;EAChB,kCAAW;IACT,SAAS,EAAE,MAAM;;AAEvB,WAAW;EACT,KAAK,EAAE,OAAO;EACd,gBAAgB,EAAE,OAAO;EACzB,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
+"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,4BAAK;IACH,aAAa,EAAE,OAAM;IACrB,uCAAY;MACV,aAAa,EAAE,CAAC;IAClB,6CAAkB;MAChB,aAAa,EAAE,CAAC;EACpB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;;AAEvB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
 "sources": ["../sass/search.sass"],
 "names": [],
 "file": "search.css"
diff --git a/search/static/sass/search.sass b/search/static/sass/search.sass
index 443aab61..788e6018 100644
--- a/search/static/sass/search.sass
+++ b/search/static/sass/search.sass
@@ -3,6 +3,12 @@
     margin-bottom: 1.25em
     p
       margin-bottom: .25em
+    .tags
+      margin-bottom: .25rem
+      &:last-child
+        margin-bottom: 0
+      &:not(:last-child)
+        margin-bottom: 0
     span.tag
       height: 1.25rem
     .list-title
diff --git a/search/templates/search/search-macros.html b/search/templates/search/search-macros.html
index 199cb7c0..2ee3cc4b 100644
--- a/search/templates/search/search-macros.html
+++ b/search/templates/search/search-macros.html
@@ -131,27 +131,33 @@
 {% for result in results %}
   <li class="arxiv-result">
     <div class="level is-marginless">
-      <p class="list-title level-left">
+      <div class="level-left">
+      <p class="list-title level-item">
         {%- set display_paper_id = result.paper_id if is_current(result) else result.paper_id_v -%}
         <a href="{{ external_url('browse', 'abstract', paper_id=display_paper_id) }}">arXiv:{{ display_paper_id }}</a>
         <span>&nbsp;{{ paper_format_links(result, external_url, display_paper_id) }}&nbsp;</span>
+        <div class="tags">
         <span class="tag is-small {% if result.highlight.primary_classification %}search-hit{% else %}is-link{% endif %} tooltip is-tooltip-top" data-tooltip="{{ result.primary_classification.category.name }}">{{ result.primary_classification.category.id }}</span>
         {% if result.secondary_classification %}
           {% for secondary in result.secondary_classification %}
-            <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="{{secondary.category.name}}">{{ secondary.category.id }}</span>
+            <span class="tag is-small {% if result.highlight.secondary_classification %}search-hit{% else %}is-grey{% endif %} tooltip is-tooltip-top" data-tooltip="{{secondary.category.name}}">{{ secondary.category.id }}</span>
           {% endfor %}
         {% endif -%}
+      </div>
       </p>
+      </div>
       {% if result.doi %}
-      <div class="field is-grouped is-grouped-multiline level-right">
-        {% for doi in result.doi %}
-        <div class="control">
-          <div class="tags has-addons">
-            <span class="tag is-dark is-size-7">doi</span>
-            <span class="tag is-light is-size-7"><a class="" href="https://doi.org/{{ doi }}">{{ doi }} <i class="fa fa-external-link" aria-hidden="true"></i></a></span>
+      <div class="level-right">
+        <div class="field is-grouped is-grouped-multiline level-item">
+          {% for doi in result.doi %}
+          <div class="control">
+            <div class="tags has-addons">
+              <span class="tag is-dark is-size-7">doi</span>
+              <span class="tag is-light is-size-7"><a class="" href="https://doi.org/{{ doi }}">{{ doi }} <i class="fa fa-external-link" aria-hidden="true"></i></a></span>
+            </div>
           </div>
+          {% endfor %}
         </div>
-        {% endfor %}
       </div>
       {% endif %}
     </div>

From 0a6be5280e8ae2132b6e1236485c8551363057bf Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 09:35:12 -0500
Subject: [PATCH 34/77] fix for hit highlighting

---
 search/routes/ui.py                   | 1 -
 search/services/index/highlighting.py | 9 ++++++---
 search/services/index/results.py      | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/search/routes/ui.py b/search/routes/ui.py
index 5776be84..efe8341d 100644
--- a/search/routes/ui.py
+++ b/search/routes/ui.py
@@ -75,7 +75,6 @@ def search(archives: Optional[List[str]] = None) -> Union[str, Response]:
     response, code, headers = simple.search(request.args, archives)
     logger.debug(f"controller returned code: {code}")
     if code == status.HTTP_200_OK:
-        logger.debug('OK! %s', response)
         return render_template("search/search.html", pagetitle="Search",
                                archives=archives, **response)
     elif (code == status.HTTP_301_MOVED_PERMANENTLY
diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index 757cbdcc..ba9340d4 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -9,15 +9,18 @@
 """
 
 import re
-from typing import Any
+from typing import Any, Union
 
 from elasticsearch_dsl import Search, Q, SF
-from elasticsearch_dsl.response import Response
+from elasticsearch_dsl.response import Response, Hit
 import bleach
 from flask import escape
+from arxiv.base import logging
 
 from .util import TEXISM
 
+logger = logging.getLogger(__name__)
+
 HIGHLIGHT_TAG_OPEN = '<span class="search-hit mathjax">'
 HIGHLIGHT_TAG_CLOSE = '</span>'
 
@@ -148,7 +151,7 @@ def preview(value: str, fragment_size: int = 400,
 # def _highlight(value: str, pattern: )
 
 
-def add_highlighting(result: dict, raw: Response) -> dict:
+def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
     """
     Add hit highlighting to a search result.
 
diff --git a/search/services/index/results.py b/search/services/index/results.py
index bbc16606..29d068a0 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -89,6 +89,7 @@ def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
 
     if type(raw) is Response:
         result['score'] = raw.meta.score    # type: ignore
+
     if type(result.get('abstract')) is str and highlight:
         if 'preview' not in result:
             result['preview'] = {}
@@ -96,7 +97,8 @@ def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
         if result['preview']['abstract'].endswith('&hellip;'):
             result['truncated']['abstract'] = True
 
-    if highlight and type(raw) is Response:
+    logger.debug('highlight: %s, raw type: %s', highlight, type(raw))
+    if highlight and type(raw) in [Response, Hit]:
         result['highlight'] = {}
         logger.debug('%s: add highlighting to result',
                      raw.paper_id)  # type: ignore

From b2806daa67c1a7faf7cd912efa08b79c7e498667 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 12 Nov 2018 09:37:13 -0500
Subject: [PATCH 35/77] updated arxiv-base dep

---
 Pipfile                          | 2 +-
 Pipfile.lock                     | 6 +++---
 search/services/index/results.py | 1 -
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Pipfile b/Pipfile
index 8f36bd36..cf0a738f 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-base = "==0.11.1rc6"
+arxiv-base = "==0.11.1"
 arxiv-auth = "==0.1.0rc14"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index def0b52b..7366e432 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "37d01fb3e6bdfcf42c7d9a70424093f1bd5226875976c54f6b7f5d0f03e65866"
+            "sha256": "ea338f54b6f9db4c8618dc9ec2c2ee91e6eede2e2e205d7da6811269afed9af0"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:2c3eb57d1cab3af774b72b2ea0cd673d0e67d78f41dbbc11b6ee648c38bb92c9"
+                "sha256:159a2c54ce7f0d562c5df28318b92086df8155e7aa29f7586835b9aaab74b749"
             ],
             "index": "pypi",
-            "version": "==0.11.1rc6"
+            "version": "==0.11.1"
         },
         "bleach": {
             "hashes": [
diff --git a/search/services/index/results.py b/search/services/index/results.py
index 29d068a0..fcfab44f 100644
--- a/search/services/index/results.py
+++ b/search/services/index/results.py
@@ -97,7 +97,6 @@ def to_document(raw: Union[Hit, dict], highlight: bool = True) -> Document:
         if result['preview']['abstract'].endswith('&hellip;'):
             result['truncated']['abstract'] = True
 
-    logger.debug('highlight: %s, raw type: %s', highlight, type(raw))
     if highlight and type(raw) in [Response, Hit]:
         result['highlight'] = {}
         logger.debug('%s: add highlighting to result',

From 0a451584fda259419e2bd4fac2b7e2ce477551cc Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Mon, 12 Nov 2018 12:13:51 -0500
Subject: [PATCH 36/77] ARXIVNG-1349 change layout for search results to better
 align tags and DOI tags

---
 search/static/css/search.css               |  6 +-----
 search/static/css/search.css.map           |  2 +-
 search/static/sass/search.sass             |  6 +-----
 search/templates/search/search-macros.html | 20 +++++++-------------
 4 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/search/static/css/search.css b/search/static/css/search.css
index d94214d1..4f1fba4f 100644
--- a/search/static/css/search.css
+++ b/search/static/css/search.css
@@ -3,11 +3,7 @@
   .content .arxiv-result p {
     margin-bottom: 0.25em; }
   .content .arxiv-result .tags {
-    margin-bottom: 0.25rem; }
-    .content .arxiv-result .tags:last-child {
-      margin-bottom: 0; }
-    .content .arxiv-result .tags:not(:last-child) {
-      margin-bottom: 0; }
+    margin-bottom: 0; }
   .content .arxiv-result span.tag {
     height: 1.25rem; }
   .content .arxiv-result .list-title {
diff --git a/search/static/css/search.css.map b/search/static/css/search.css.map
index 034b7d33..b74e7dcf 100644
--- a/search/static/css/search.css.map
+++ b/search/static/css/search.css.map
@@ -1,6 +1,6 @@
 {
 "version": 3,
-"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,4BAAK;IACH,aAAa,EAAE,OAAM;IACrB,uCAAY;MACV,aAAa,EAAE,CAAC;IAClB,6CAAkB;MAChB,aAAa,EAAE,CAAC;EACpB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;;AAEvB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
+"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,4BAAK;IACH,aAAa,EAAE,CAAC;EAClB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;;AAEvB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
 "sources": ["../sass/search.sass"],
 "names": [],
 "file": "search.css"
diff --git a/search/static/sass/search.sass b/search/static/sass/search.sass
index 788e6018..9402c9e5 100644
--- a/search/static/sass/search.sass
+++ b/search/static/sass/search.sass
@@ -4,11 +4,7 @@
     p
       margin-bottom: .25em
     .tags
-      margin-bottom: .25rem
-      &:last-child
-        margin-bottom: 0
-      &:not(:last-child)
-        margin-bottom: 0
+      margin-bottom: 0
     span.tag
       height: 1.25rem
     .list-title
diff --git a/search/templates/search/search-macros.html b/search/templates/search/search-macros.html
index 2ee3cc4b..0a3c1d1c 100644
--- a/search/templates/search/search-macros.html
+++ b/search/templates/search/search-macros.html
@@ -130,13 +130,13 @@
 
 {% for result in results %}
   <li class="arxiv-result">
-    <div class="level is-marginless">
-      <div class="level-left">
-      <p class="list-title level-item">
+    <div class="is-marginless">
+      <p class="list-title is-inline-block">
         {%- set display_paper_id = result.paper_id if is_current(result) else result.paper_id_v -%}
         <a href="{{ external_url('browse', 'abstract', paper_id=display_paper_id) }}">arXiv:{{ display_paper_id }}</a>
         <span>&nbsp;{{ paper_format_links(result, external_url, display_paper_id) }}&nbsp;</span>
-        <div class="tags">
+      </p>
+      <div class="tags is-inline-block">
         <span class="tag is-small {% if result.highlight.primary_classification %}search-hit{% else %}is-link{% endif %} tooltip is-tooltip-top" data-tooltip="{{ result.primary_classification.category.name }}">{{ result.primary_classification.category.id }}</span>
         {% if result.secondary_classification %}
           {% for secondary in result.secondary_classification %}
@@ -144,21 +144,15 @@
           {% endfor %}
         {% endif -%}
       </div>
-      </p>
-      </div>
       {% if result.doi %}
-      <div class="level-right">
-        <div class="field is-grouped is-grouped-multiline level-item">
-          {% for doi in result.doi %}
-          <div class="control">
+        {% for doi in result.doi %}
+          <div class="is-inline-block" style="margin-left: 0.5rem">
             <div class="tags has-addons">
               <span class="tag is-dark is-size-7">doi</span>
               <span class="tag is-light is-size-7"><a class="" href="https://doi.org/{{ doi }}">{{ doi }} <i class="fa fa-external-link" aria-hidden="true"></i></a></span>
             </div>
           </div>
-          {% endfor %}
-        </div>
-      </div>
+        {% endfor %}
       {% endif %}
     </div>
     {# Note: mathjax class should be applied to any element that requires MathJax processing #}

From b540ee907ce1ff667ad621b090eb2e20732acf83 Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Wed, 14 Nov 2018 09:31:44 -0500
Subject: [PATCH 37/77] ARXIVNG-1349 rearrange header in advanced search take 1

---
 search/templates/search/advanced_search.html | 29 ++++++++------------
 search/templates/search/base.html            |  8 +++++-
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/search/templates/search/advanced_search.html b/search/templates/search/advanced_search.html
index 092abd9f..82f5383d 100644
--- a/search/templates/search/advanced_search.html
+++ b/search/templates/search/advanced_search.html
@@ -339,25 +339,18 @@
     {% if has_classic_format %}
       {{ search_macros.show_classic_author_search() }}
     {% endif %}
-    <div class="level">
-      <div class="level-left" style="max-width: 90%;"> <!-- width adjustment for long query strings -->
-        <div class="level-item">
-          <a class="button" href="{{ url_for('ui.advanced_search') }}">Start a new search</a>
-        </div>
-        <div class="level-item">
-          <a class="button is-link" href="{{ current_url_sans_parameters('advanced') }}">Refine This Query</a>
-        </div>
-        <div class="level-item" style="word-wrap:break-word; max-width: 70%;"><a href="{{ current_url_sans_parameters('advanced') }}">{{ query }}</a>
-        </div> <!-- styling here is also to fit long query strings without breaking layout -->
+
+    <div class="columns">
+      <div class="column is-two-thirds-tablet">
+        <p class="is-marginless">Query: <a href="{{ current_url_sans_parameters('advanced') }}">{{ query }}</a></p>
+        <a class="button is-link" href="{{ current_url_sans_parameters('advanced') }}">Refine query</a> <a class="button" href="{{ url_for('ui.advanced_search') }}">New search</a>
       </div>
-      <div class="level-right">
-        <div class="level-item">
-          {% if query %}
-          <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
-          {% else %}
-          <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
-          {% endif %}
-        </div>
+      <div class="column is-one-third-tablet is-hidden-mobile">
+        {% if query %}
+        <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
+        {% else %}
+        <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
+        {% endif %}
       </div>
     </div>
 
diff --git a/search/templates/search/base.html b/search/templates/search/base.html
index dece26ed..e7e421db 100644
--- a/search/templates/search/base.html
+++ b/search/templates/search/base.html
@@ -41,7 +41,8 @@
     <div class="level-left">
       <h1 class="title is-clearfix">{% block title %}Search{% endblock title %}</h1>
     </div>
-    <div class="level-right">
+    <div class="level-right is-hidden-mobile">
+      <!-- feedback for mobile is moved to footer -->
       <span class="help" style="display: inline-block;"><a href="{{ config.RELEASE_NOTES_URL }}">{{ config.RELEASE_NOTES_TEXT }}</a>&nbsp;&nbsp;</span>
       <button class="button is-small" id="feedback-button">Feedback?</button>
     </div>
@@ -50,5 +51,10 @@ <h1 class="title is-clearfix">{% block title %}Search{% endblock title %}</h1>
       {% block within_content %}
         Specific results here
       {% endblock within_content %}
+      <div class="is-hidden-tablet">
+        <!-- feedback for mobile only -->
+        <span class="help" style="display: inline-block;"><a href="{{ config.RELEASE_NOTES_URL }}">{{ config.RELEASE_NOTES_TEXT }}</a>&nbsp;&nbsp;</span>
+        <button class="button is-small" id="feedback-button">Feedback?</button>
+      </div>
     </content>
 {% endblock content %}

From f4f23334babeab04addfa8a54a192f873ca686f9 Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Thu, 15 Nov 2018 13:25:09 -0500
Subject: [PATCH 38/77] ARXIVNG-1349 tweak margins and layout for
 tabletet/mobile

---
 search/templates/search/advanced_search.html | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/search/templates/search/advanced_search.html b/search/templates/search/advanced_search.html
index 82f5383d..928a490e 100644
--- a/search/templates/search/advanced_search.html
+++ b/search/templates/search/advanced_search.html
@@ -342,15 +342,19 @@
 
     <div class="columns">
       <div class="column is-two-thirds-tablet">
-        <p class="is-marginless">Query: <a href="{{ current_url_sans_parameters('advanced') }}">{{ query }}</a></p>
-        <a class="button is-link" href="{{ current_url_sans_parameters('advanced') }}">Refine query</a> <a class="button" href="{{ url_for('ui.advanced_search') }}">New search</a>
+        <p style="margin-bottom: .5em">Query: <a href="{{ current_url_sans_parameters('advanced') }}">{{ query }}</a></p>
+        <div class="buttons">
+          <a class="button is-link" href="{{ current_url_sans_parameters('advanced') }}">Refine query</a><a class="button" href="{{ url_for('ui.advanced_search') }}">New search</a>
+        </div>
       </div>
       <div class="column is-one-third-tablet is-hidden-mobile">
-        {% if query %}
-        <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
-        {% else %}
-        <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
-        {% endif %}
+        <p class="has-text-right" style="margin-top: 1em">
+          {% if query %}
+          <a href="{{ url_with_params("ui.search", {}, {"order": query.order, "size": query.size}) }}">Simple Search</a>
+          {% else %}
+          <a href="{{ url_with_params("ui.search", {}, {"order": form.order.data, "size": form.size.data }) }}">Simple Search</a>
+          {% endif %}
+        </p>
       </div>
     </div>
 

From 9d36f62b2f109132cae5e857d7175aca1eb34ca3 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 12:56:00 -0500
Subject: [PATCH 39/77] ARXIVNG-1363 upgraded requests version; ARXIVNG-1362
 added custom user agent

---
 =2.20.0                     | 11 +++++++++++
 search/services/metadata.py |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 =2.20.0

diff --git a/=2.20.0 b/=2.20.0
new file mode 100644
index 00000000..884d9061
--- /dev/null
+++ b/=2.20.0
@@ -0,0 +1,11 @@
+Installing requests…
+Requirement already satisfied: requests in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages
+Requirement already satisfied: idna<2.8,>=2.5 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
+Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
+Requirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
+Requirement already satisfied: certifi>=2017.4.17 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
+
+Adding requests to Pipfile's [packages]…
+Installing dependencies from Pipfile.lock (d9f678)…
+To activate this project's virtualenv, run the following:
+ $ pipenv shell
diff --git a/search/services/metadata.py b/search/services/metadata.py
index 0c518aec..4de6bfb9 100644
--- a/search/services/metadata.py
+++ b/search/services/metadata.py
@@ -114,7 +114,8 @@ def retrieve(self, document_id: str) -> DocMeta:
                 f'{document_id}: retrieve metadata from {target} with SSL'
                 f' verify {self._verify_cert}'
             )
-            response = requests.get(target, verify=self._verify_cert)
+            response = requests.get(target, verify=self._verify_cert,
+                                    headers={'User-Agent': 'arXiv/system'})
         except requests.exceptions.SSLError as e:
             logger.error('SSLError: %s', e)
             raise SecurityException('SSL failed: %s' % e) from e

From 5b0bc3cfaebd9d2ce408a5ce67b5c5775ba28ccb Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 12:59:22 -0500
Subject: [PATCH 40/77] minor

---
 =2.20.0      | 11 --------
 Pipfile      |  2 +-
 Pipfile.lock | 77 ++++++++++++++++++++++++----------------------------
 3 files changed, 36 insertions(+), 54 deletions(-)
 delete mode 100644 =2.20.0

diff --git a/=2.20.0 b/=2.20.0
deleted file mode 100644
index 884d9061..00000000
--- a/=2.20.0
+++ /dev/null
@@ -1,11 +0,0 @@
-Installing requests…
-Requirement already satisfied: requests in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages
-Requirement already satisfied: idna<2.8,>=2.5 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
-Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
-Requirement already satisfied: urllib3<1.25,>=1.21.1 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
-Requirement already satisfied: certifi>=2017.4.17 in /Users/brp53/.local/share/virtualenvs/arxiv-search-sJgjMA70/lib/python3.6/site-packages (from requests)
-
-Adding requests to Pipfile's [packages]…
-Installing dependencies from Pipfile.lock (d9f678)…
-To activate this project's virtualenv, run the following:
- $ pipenv shell
diff --git a/Pipfile b/Pipfile
index fe3ff4c6..c739b241 100644
--- a/Pipfile
+++ b/Pipfile
@@ -38,7 +38,7 @@ pyflakes = "==1.6.0"
 pylama = "==7.4.3"
 python-dateutil = "==2.6.1"
 pytz = "==2017.3"
-requests = "==2.18.4"
+requests = "==2.20.0"
 "s3transfer" = "==0.1.13"
 snowballstemmer = "==1.2.1"
 thrift = "==0.11.0"
diff --git a/Pipfile.lock b/Pipfile.lock
index 1ea189d1..53044ac8 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "d1b1281c8c786c7f3ca24513df4fea25993b9b4bb08fcf0c66586d6455701ac4"
+            "sha256": "6dd7ab826af5c93582df0c39d70ab3c1fea38100ac5d0039fc204f166a93227f"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -30,11 +30,11 @@
         },
         "bleach": {
             "hashes": [
-                "sha256:0ee95f6167129859c5dce9b1ca291ebdb5d8cd7e382ca0e237dfd0dad63f63d8",
-                "sha256:24754b9a7d530bf30ce7cbc805bc6cce785660b4a10ff3a43633728438c105ab"
+                "sha256:48d39675b80a75f6d1c3bdbffec791cf0bbbab665cf01e20da701c77de278718",
+                "sha256:73d26f018af5d5adcdabf5c1c974add4361a9c76af215fe32fdec8a6fc5fb9b9"
             ],
             "index": "pypi",
-            "version": "==2.1.4"
+            "version": "==3.0.2"
         },
         "boto": {
             "hashes": [
@@ -180,13 +180,6 @@
             "index": "pypi",
             "version": "==0.3.3"
         },
-        "html5lib": {
-            "hashes": [
-                "sha256:20b159aa3badc9d5ee8f5c647e5efd02ed2a66ab8d354930bd9ff139fc1dc0a3",
-                "sha256:66cb0dcfdbbc4f9c3ba1a63fdb511ffdbd4f513b2b6d81b80cd26ce6b3fb3736"
-            ],
-            "version": "==1.0.1"
-        },
         "idna": {
             "hashes": [
                 "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f",
@@ -371,10 +364,10 @@
         },
         "pyjwt": {
             "hashes": [
-                "sha256:30b1380ff43b55441283cc2b2676b755cca45693ae3097325dea01f3d110628c",
-                "sha256:4ee413b357d53fd3fb44704577afac88e72e878716116270d722723d65b42176"
+                "sha256:00414bfef802aaecd8cc0d5258b6cb87bd8f553c2986c2c5f29b19dd5633aeb7",
+                "sha256:ddec8409c57e9d371c6006e388f91daf3b0b43bdf9fcbf99451fb7cf5ce0a86d"
             ],
-            "version": "==1.6.4"
+            "version": "==1.7.0"
         },
         "pylama": {
             "hashes": [
@@ -416,17 +409,17 @@
         },
         "redis-py-cluster": {
             "hashes": [
-                "sha256:e7349b1f2487d2c763088d81162e4a2ed91adee975c296bdba2ed96fd7450b36"
+                "sha256:7db54b1de60bd34da3806676b112f07fc9afae556d8260ac02c3335d574ee42c"
             ],
-            "version": "==1.3.5"
+            "version": "==1.3.6"
         },
         "requests": {
             "hashes": [
-                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
-                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+                "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
+                "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
             ],
             "index": "pypi",
-            "version": "==2.18.4"
+            "version": "==2.20.0"
         },
         "s3transfer": {
             "hashes": [
@@ -453,9 +446,9 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:c5951d9ef1d5404ed04bae5a16b60a0779087378928f997a294d1229c6ca4d3e"
+                "sha256:9de7c7dabcf06319becdb7e15099c44e5e34ba7062f9ba10bc00e562f5db3d04"
             ],
-            "version": "==1.2.12"
+            "version": "==1.2.14"
         },
         "thrift": {
             "hashes": [
@@ -541,10 +534,10 @@
     "develop": {
         "alabaster": {
             "hashes": [
-                "sha256:674bb3bab080f598371f4443c5008cbfeb1a5e622dd312395d2d82af2c54c456",
-                "sha256:b63b1f4dc77c074d386752ec4a8a7517600f6c0db8cd42980cae17ab7b3275d7"
+                "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
+                "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
             ],
-            "version": "==0.7.11"
+            "version": "==0.7.12"
         },
         "babel": {
             "hashes": [
@@ -617,11 +610,11 @@
         },
         "coveralls": {
             "hashes": [
-                "sha256:9dee67e78ec17b36c52b778247762851c8e19a893c9a14e921a2fc37f05fac22",
-                "sha256:aec5a1f5e34224b9089664a1b62217732381c7de361b6ed1b3c394d7187b352a"
+                "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
+                "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
             ],
             "index": "pypi",
-            "version": "==1.5.0"
+            "version": "==1.5.1"
         },
         "docopt": {
             "hashes": [
@@ -677,17 +670,17 @@
         },
         "pygments": {
             "hashes": [
-                "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
-                "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
+                "sha256:6301ecb0997a52d2d31385e62d0a4a4cf18d2f2da7054a5ddad5c366cd39cee7",
+                "sha256:82666aac15622bd7bb685a4ee7f6625dd716da3ef7473620c192c0168aae64fc"
             ],
-            "version": "==2.2.0"
+            "version": "==2.3.0"
         },
         "pyparsing": {
             "hashes": [
-                "sha256:905d8090c335314568b5faee0025b1829f27bb974604a5762a6cdef3a7dfc3b7",
-                "sha256:f493ee323be1e94929416b3585eefcc04943115cecbaaa35a8c86d1a2368af19"
+                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
+                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
             ],
-            "version": "==2.2.1"
+            "version": "==2.3.0"
         },
         "pytz": {
             "hashes": [
@@ -706,11 +699,11 @@
         },
         "requests": {
             "hashes": [
-                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
-                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+                "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
+                "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
             ],
             "index": "pypi",
-            "version": "==2.18.4"
+            "version": "==2.20.0"
         },
         "six": {
             "hashes": [
@@ -729,19 +722,19 @@
         },
         "sphinx": {
             "hashes": [
-                "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
-                "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
+                "sha256:120732cbddb1b2364471c3d9f8bfd4b0c5b550862f99a65736c77f970b142aea",
+                "sha256:b348790776490894e0424101af9c8413f2a86831524bd55c5f379d3e3e12ca64"
             ],
             "index": "pypi",
-            "version": "==1.8.1"
+            "version": "==1.8.2"
         },
         "sphinx-autodoc-typehints": {
             "hashes": [
-                "sha256:1a9df6cb3ba72453ea4bfbe96ea887abc0d796b2ce9508c2189217a1bb69b366",
-                "sha256:46cc9e985ee6d8bbbd07fffd95b815c39a72df6afb600f59671f85f7340e7d0d"
+                "sha256:9f546fa18ee6bfb17f5cf937805d3c8afea48b976050db0bd14bb463eee97888",
+                "sha256:adfa4712e77c795522574ce644e084cded2f6e796a59d37e7f1cb98287687100"
             ],
             "index": "pypi",
-            "version": "==1.3.0"
+            "version": "==1.5.2"
         },
         "sphinxcontrib-websupport": {
             "hashes": [

From 53570869617120f6ef464cab41b9f7b9ac5f4e89 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 13:12:02 -0500
Subject: [PATCH 41/77] upgraded Flask to 1.0.2

---
 Pipfile          |  4 ++--
 Pipfile.lock     | 14 +++++++-------
 search/config.py |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Pipfile b/Pipfile
index c739b241..4fdee1c0 100644
--- a/Pipfile
+++ b/Pipfile
@@ -17,7 +17,7 @@ dataclasses = "==0.4"
 docutils = "==0.14"
 elasticsearch = "==6.2.0"
 elasticsearch-dsl = "==6.1.0"
-flask = "==0.12.2"
+flask = "==1.0.2"
 "flask-s3" = "==0.3.3"
 idna = "==2.6"
 ipaddress = "==1.0.19"
@@ -45,7 +45,7 @@ thrift = "==0.11.0"
 thrift-connector = "==0.23"
 typed-ast = "==1.1.0"
 "urllib3" = "==1.22"
-werkzeug = "==0.13"
+werkzeug = "==0.14.1"
 wtforms = "==2.1"
 bleach = "*"
 lxml = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
index 53044ac8..31a0b01b 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "6dd7ab826af5c93582df0c39d70ab3c1fea38100ac5d0039fc204f166a93227f"
+            "sha256": "1250f4dc273330bb826877b641fe864cf1f0e97c4c8cefa635ce1f7f6b316c31"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -165,11 +165,11 @@
         },
         "flask": {
             "hashes": [
-                "sha256:0749df235e3ff61ac108f69ac178c9770caeaccad2509cb762ce1f65570a8856",
-                "sha256:49f44461237b69ecd901cc7ce66feea0319b9158743dd27a2899962ab214dac1"
+                "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48",
+                "sha256:a080b744b7e345ccfcbc77954861cb05b3c63786e93f2b3875e0913d44b43f05"
             ],
             "index": "pypi",
-            "version": "==0.12.2"
+            "version": "==1.0.2"
         },
         "flask-s3": {
             "hashes": [
@@ -517,11 +517,11 @@
         },
         "werkzeug": {
             "hashes": [
-                "sha256:6246e5fc98a505824113fb6aca993d45ea284a2bcffdc2c65d0c538e53e4abd3",
-                "sha256:f3000aa146ce8a9da8ca3e978e0e931c2a58eb56c323a5efb6b4307f7832b549"
+                "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c",
+                "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b"
             ],
             "index": "pypi",
-            "version": "==0.13"
+            "version": "==0.14.1"
         },
         "wtforms": {
             "hashes": [
diff --git a/search/config.py b/search/config.py
index a1488c7b..acfb5141 100644
--- a/search/config.py
+++ b/search/config.py
@@ -57,7 +57,7 @@
 application context.
 """
 
-APPLICATION_ROOT = os.environ.get('APPLICATION_ROOT', None)
+APPLICATION_ROOT = os.environ.get('APPLICATION_ROOT', '/')
 """
 If the application does not occupy a whole domain or subdomain this can be set
 to the path where the application is configured to live. This is for session

From 7d06422abf7b61b9dec354357bde7ff2e23aaddc Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 13:21:51 -0500
Subject: [PATCH 42/77] ARXIVNG-1357 upgraded secondary classification mapping
 to be consistent with primary

---
 mappings/DocumentMapping.json | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/mappings/DocumentMapping.json b/mappings/DocumentMapping.json
index dc718f70..675bce89 100644
--- a/mappings/DocumentMapping.json
+++ b/mappings/DocumentMapping.json
@@ -449,10 +449,14 @@
               "type": "object",
               "properties": {
                 "id": {
-                  "type": "keyword"
+                  "type": "keyword",
+                  "normalizer": "simple",
+                  "copy_to": ["combined"]
                 },
                 "name": {
-                  "type": "keyword"
+                  "type": "keyword",
+                  "normalizer": "simple",
+                  "copy_to": ["combined"]
                 }
               }
             },
@@ -460,10 +464,14 @@
               "type": "object",
               "properties": {
                 "id": {
-                  "type": "keyword"
+                  "type": "keyword",
+                  "normalizer": "simple",
+                  "copy_to": ["combined"]
                 },
                 "name": {
-                  "type": "keyword"
+                  "type": "keyword",
+                  "normalizer": "simple",
+                  "copy_to": ["combined"]
                 }
               }
             }

From 1ae8b35551bd41ed2ba379005e76c9367e82b9a9 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 14:46:40 -0500
Subject: [PATCH 43/77] fixed hit highlighting for secondaries

---
 search/services/index/highlighting.py      |  3 +++
 search/services/index/prepare.py           | 22 ++++++++++++----------
 search/templates/search/search-macros.html |  2 +-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index ba9340d4..621fe9a2 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -188,6 +188,9 @@ def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
         if 'primary_classification' in field:
             field = 'primary_classification'
         if 'secondary_classification' in field:
+            value = value \
+                .split(HIGHLIGHT_TAG_OPEN, 1)[1] \
+                .split(HIGHLIGHT_TAG_CLOSE, 1)[0]
             field = 'secondary_classification'
 
         # Non-TeX searches may hit inside of TeXisms. Highlighting those
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index c7485488..ab297f4b 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -118,18 +118,20 @@ def _query_primary(term: str, operator: str = 'and') -> Q:
     # TODO: in a future version, we should consider changes to the mappings
     # to make this more straightforward.
     if operator == 'or':
-        return reduce(ior, [(
+        _q = reduce(ior, [(
             Q("match", **{"primary_classification__category__id": {"query": part, "operator": operator}})
             | Q("wildcard", **{"primary_classification.category.name": f"*{part}*"})
             | Q("match", **{"primary_classification__archive__id": {"query": part, "operator": operator}})
             | Q("wildcard", **{"primary_classification.archive.name": f"*{part}*"})
         ) for part in term.split()])
-    return (
-        Q("match", **{"primary_classification__category__id": {"query": term, "operator": operator}})
-        | Q("match", **{"primary_classification__category__name": {"query": term, "operator": operator}})
-        | Q("match", **{"primary_classification__archive__id": {"query": term, "operator": operator}})
-        | Q("match", **{"primary_classification__archive__name": {"query": term, "operator": operator}})
-    )
+    else:
+        _q = (
+            Q("match", **{"primary_classification__category__id": {"query": term, "operator": operator}})
+            | Q("match", **{"primary_classification__category__name": {"query": term, "operator": operator}})
+            | Q("match", **{"primary_classification__archive__id": {"query": term, "operator": operator}})
+            | Q("match", **{"primary_classification__archive__name": {"query": term, "operator": operator}})
+        )
+    return _q
 
 
 def _query_secondary(term: str, operator: str = 'and') -> Q:
@@ -239,7 +241,7 @@ def _query_all_fields(term: str) -> Q:
         _query_acm_class(term, operator='or'),
         _query_msc_class(term, operator='or'),
         _query_primary(term, operator='or'),
-        _query_secondary(term, operator='or')
+        _query_secondary(term, operator='or'),
     ]
 
     # If the whole query matches on a specific field, we should consider that
@@ -257,10 +259,10 @@ def _query_all_fields(term: str) -> Q:
         _query_report_num(term, operator='and'),
         _query_acm_class(term, operator='and'),
         _query_msc_class(term, operator='and'),
-        _query_primary(term, operator='and')
+        _query_primary(term, operator='and'),
+        _query_secondary(term, operator='and')
     ])
 
-
     # It is possible that the query includes a date-related term, which we
     # interpret as an announcement date of v1 of the paper. We currently
     # support both "standard" `yyyy` or `yyyy-MM`` formats as well as a
diff --git a/search/templates/search/search-macros.html b/search/templates/search/search-macros.html
index 0a3c1d1c..33efa915 100644
--- a/search/templates/search/search-macros.html
+++ b/search/templates/search/search-macros.html
@@ -140,7 +140,7 @@
         <span class="tag is-small {% if result.highlight.primary_classification %}search-hit{% else %}is-link{% endif %} tooltip is-tooltip-top" data-tooltip="{{ result.primary_classification.category.name }}">{{ result.primary_classification.category.id }}</span>
         {% if result.secondary_classification %}
           {% for secondary in result.secondary_classification %}
-            <span class="tag is-small {% if result.highlight.secondary_classification %}search-hit{% else %}is-grey{% endif %} tooltip is-tooltip-top" data-tooltip="{{secondary.category.name}}">{{ secondary.category.id }}</span>
+            <span class="tag is-small {% if result.highlight.secondary_classification == secondary.category.id %}search-hit{% else %}is-grey{% endif %} tooltip is-tooltip-top" data-tooltip="{{secondary.category.name}}">{{ secondary.category.id }}</span>
           {% endfor %}
         {% endif -%}
       </div>

From d1b74d06349063b7b60d7d169b933cc392ee6609 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 3 Dec 2018 18:18:13 -0500
Subject: [PATCH 44/77] highlighting fix for non-field hits on primary and
 secondary category

---
 search/services/index/highlighting.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index 621fe9a2..f59f0b24 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -71,9 +71,9 @@ def highlight(search: Search) -> Search:
     search = search.highlight('abstract.english', number_of_fragments=0)
 
     search = search.highlight('primary_classification*', type='plain',
-                              number_of_fragments=0)
+                              number_of_fragments=0, require_field_match=False)
     search = search.highlight('secondary_classification*', type='plain',
-                              number_of_fragments=0)
+                              number_of_fragments=0, require_field_match=False)
     return search
 
 
@@ -148,9 +148,6 @@ def preview(value: str, fragment_size: int = 400,
     return snippet
 
 
-# def _highlight(value: str, pattern: )
-
-
 def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
     """
     Add hit highlighting to a search result.
@@ -174,7 +171,6 @@ def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
     # ``meta.matched_queries`` contains a list of query ``_name``s that
     # matched. This is nice for non-string fields.
     matched_fields = getattr(raw.meta, 'matched_queries', [])
-
     # The values here will (almost) always be list-like. So we need to stitch
     # them together. Note that dir(None) won't return anything, so this block
     # is skipped if there are no highlights from ES.
@@ -184,7 +180,6 @@ def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
         value = getattr(highlighted_fields, field)
         if hasattr(value, '__iter__'):
             value = '&hellip;'.join(value)
-
         if 'primary_classification' in field:
             field = 'primary_classification'
         if 'secondary_classification' in field:

From 752b3c930740cf513a0a9d570d11a00ffe5feb52 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 12:08:34 -0500
Subject: [PATCH 45/77] minor bugfix

---
 DECISIONS.md                         | 10 ++++++++++
 RELEASE_NOTES.md                     | 18 ++++++++++++++++++
 search/domain/base.py                |  8 ++++----
 search/services/index/tests/tests.py |  6 +++---
 4 files changed, 35 insertions(+), 7 deletions(-)
 create mode 100644 RELEASE_NOTES.md

diff --git a/DECISIONS.md b/DECISIONS.md
index 4e619c9c..f8056ac8 100644
--- a/DECISIONS.md
+++ b/DECISIONS.md
@@ -1,5 +1,7 @@
 # Decision log
 
+## Initial design decisions - v0.1-0.4
+
 1. To get started quickly, we will start with an AWS Elasticsearch managed
    cluster running in the cloud. We may wish to run our own cluster in the
    future.
@@ -32,3 +34,11 @@
    results, and we are only seeking feature-parity with the classic system.
    When we address hit highlighting, we can show matching author names deep in
    author list to provide visual feedback to the user.
+
+## Subsequent decisions
+
+- 2018-12-18. Removing cross-list functionality in v0.1 was a regression. Users
+  expect to be able to search by cross-list category just like primary
+  category. We decided to include cross-list/secondary category in the
+  all-fields search, and also add a cross-list option to the advanced search
+  interface.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
new file mode 100644
index 00000000..d86050c9
--- /dev/null
+++ b/RELEASE_NOTES.md
@@ -0,0 +1,18 @@
+# Release notes
+
+## Version 0.5 - The cross-list release.
+
+### Bug
+- [ARXIVNG-1349] - Layout of category tags and DOI is really wonky in search results
+
+
+### New Feature
+- [ARXIVNG-1277] - Search result entries should display secondary categories if present
+- [ARXIVNG-1278] - "All fields" queries should search secondary categories
+- [ARXIVNG-1347] - Search by secondary/cross-list
+- [ARXIVNG-1357] - Update document mapping for secondaries to be consistent with mapping for primary category
+- [ARXIVNG-1362] - Search indexing agent should identify itself using a custom User-Agent
+
+### Task
+- [ARXIVNG-1048] - Update accessibility notice in footer
+- [ARXIVNG-1348] - Update requests dependency to 2.20.0 or greater
diff --git a/search/domain/base.py b/search/domain/base.py
index 7ba8e627..d5a916c9 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -126,14 +126,14 @@ def __str__(self) -> str:
 class Classification:
     """Represents an arXiv classification for a paper."""
 
-    group: Optional[dict] = None
-    archive: Optional[dict] = None
-    category: Optional[dict] = None
+    group: Optional[str] = None
+    archive: Optional[str] = None
+    category: Optional[str] = None
 
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""
         return ":".join(
-            [p['id'] for p in [self.group, self.archive, self.category] if p]
+            [p for p in [self.group, self.archive, self.category] if p]
         )
 
 
diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index 85bc1f45..bf13524e 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -52,9 +52,9 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
             ),
             primary_classification=ClassificationList([
                 Classification(
-                    group={'id': 'physics'},
-                    archive={'id': 'physics'},
-                    category={'id': 'hep-th'}
+                    group='physics',
+                    archive='physics',
+                    category='hep-th'
                 )
             ]),
             terms=FieldedSearchList([

From 8711be8c0314f02baa9b1a20e08158e442978483 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 12:33:40 -0500
Subject: [PATCH 46/77] better fix for category bug

---
 search/controllers/advanced/__init__.py |  8 ++++----
 search/controllers/advanced/tests.py    | 10 ++++++----
 search/controllers/api/__init__.py      |  4 +---
 search/controllers/simple/__init__.py   |  2 +-
 search/domain/base.py                   |  8 ++++----
 search/services/index/advanced.py       |  4 ++--
 search/services/index/prepare.py        |  6 +++---
 search/services/index/tests/tests.py    |  6 +++---
 8 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/search/controllers/advanced/__init__.py b/search/controllers/advanced/__init__.py
index f26a92cf..18f5a851 100644
--- a/search/controllers/advanced/__init__.py
+++ b/search/controllers/advanced/__init__.py
@@ -200,18 +200,18 @@ def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \
             # Fix for these typing issues is coming soon!
             #  See: https://github.com/python/mypy/pull/4397
             q.primary_classification.append(
-                Classification(archive=archive)  # type: ignore
+                Classification(archive={'id': archive})  # type: ignore
             )
     if data.get('physics') and 'physics_archives' in data:
         if 'all' in data['physics_archives']:
             q.primary_classification.append(
-                Classification(group='grp_physics')  # type: ignore
+                Classification(group={'id': 'grp_physics'})  # type: ignore
             )
         else:
             q.primary_classification.append(
                 Classification(     # type: ignore
-                    group='grp_physics',
-                    archive=data['physics_archives']
+                    group={'id': 'grp_physics'},
+                    archive={'id': data['physics_archives']}
                 )
             )
     return q
diff --git a/search/controllers/advanced/tests.py b/search/controllers/advanced/tests.py
index d547cad7..93c347d9 100644
--- a/search/controllers/advanced/tests.py
+++ b/search/controllers/advanced/tests.py
@@ -395,7 +395,7 @@ def test_classification_is_selected(self):
         self.assertIsInstance(q.primary_classification, list)
         self.assertEqual(len(q.primary_classification), 1)
         self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertEqual(q.primary_classification[0].archive, 'cs')
+        self.assertEqual(q.primary_classification[0].archive['id'], 'cs')
 
     def test_multiple_classifications_are_selected(self):
         """Selected classifications are added to the query."""
@@ -416,7 +416,8 @@ def test_physics_is_selected_all_archives(self):
         self.assertEqual(len(q.primary_classification), 1)
         self.assertIsInstance(q.primary_classification[0], Classification)
         self.assertIsNone(q.primary_classification[0].archive)
-        self.assertEqual(q.primary_classification[0].group, 'grp_physics')
+        self.assertEqual(q.primary_classification[0].group['id'],
+                         'grp_physics')
 
     def test_physics_is_selected_specific_archive(self):
         """The physic group and specified archive are added to the query."""
@@ -426,8 +427,9 @@ def test_physics_is_selected_specific_archive(self):
         self.assertIsInstance(q.primary_classification, list)
         self.assertEqual(len(q.primary_classification), 1)
         self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertEqual(q.primary_classification[0].archive, 'hep-ex')
-        self.assertEqual(q.primary_classification[0].group, 'grp_physics')
+        self.assertEqual(q.primary_classification[0].archive['id'], 'hep-ex')
+        self.assertEqual(q.primary_classification[0].group['id'],
+                         'grp_physics')
 
     def test_physics_is_selected_specific_archive_plus_other_groups(self):
         """The physics group and specified archive are added to the query."""
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 69a47a14..a5220f02 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -150,9 +150,7 @@ def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
                 'field': 'primary_classification',
                 'reason': 'not a valid archive'
             })
-        classifications.append(
-            Classification(archive=value)   # type: ignore
-        )
+        classifications.append(Classification(archive={'id': value}))
     if len(classifications) == 0:
         return None
     return classifications
diff --git a/search/controllers/simple/__init__.py b/search/controllers/simple/__init__.py
index 5bf6e6c0..065f38bf 100644
--- a/search/controllers/simple/__init__.py
+++ b/search/controllers/simple/__init__.py
@@ -248,7 +248,7 @@ def _update_with_archives(q: SimpleQuery, archives: List[str]) -> SimpleQuery:
     """
     logger.debug('Search within %s', archives)
     q.primary_classification = ClassificationList([
-        Classification(archive=archive) for archive in archives  # type: ignore
+        Classification(archive={'id': 'archive'}) for archive in archives
     ])
     return q
 
diff --git a/search/domain/base.py b/search/domain/base.py
index d5a916c9..7ba8e627 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -126,14 +126,14 @@ def __str__(self) -> str:
 class Classification:
     """Represents an arXiv classification for a paper."""
 
-    group: Optional[str] = None
-    archive: Optional[str] = None
-    category: Optional[str] = None
+    group: Optional[dict] = None
+    archive: Optional[dict] = None
+    category: Optional[dict] = None
 
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""
         return ":".join(
-            [p for p in [self.group, self.archive, self.category] if p]
+            [p['id'] for p in [self.group, self.archive, self.category] if p]
         )
 
 
diff --git a/search/services/index/advanced.py b/search/services/index/advanced.py
index bc4e2804..8b744052 100644
--- a/search/services/index/advanced.py
+++ b/search/services/index/advanced.py
@@ -57,10 +57,10 @@ def _classification(field: str, classification: Classification) -> Match:
     query = Q()
     if classification.group:
         field_name = '%s__group__id' % field
-        query &= Q('match', **{field_name: classification.group})
+        query &= Q('match', **{field_name: classification.group['id']})
     if classification.archive:
         field_name = '%s__archive__id' % field
-        query &= Q('match', **{field_name: classification.archive})
+        query &= Q('match', **{field_name: classification.archive['id']})
     return query
 
 
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index d50fc457..9535e319 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -342,7 +342,7 @@ def _to_q(classification: Classification) -> Q:
             _qs.append(
                 Q("match", **{
                     "primary_classification__group__id": {
-                        "query": classification.group
+                        "query": classification.group['id']
                     }
                 })
             )
@@ -350,7 +350,7 @@ def _to_q(classification: Classification) -> Q:
             _qs.append(
                 Q("match", **{
                     "primary_classification__archive__id": {
-                        "query": classification.archive
+                        "query": classification.archive['id']
                     }
                 })
             )
@@ -358,7 +358,7 @@ def _to_q(classification: Classification) -> Q:
             _qs.append(
                 Q("match", **{
                     "primary_classification__category__id": {
-                        "query": classification.category
+                        "query": classification.category['id']
                     }
                 })
             )
diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index bf13524e..85bc1f45 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -52,9 +52,9 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
             ),
             primary_classification=ClassificationList([
                 Classification(
-                    group='physics',
-                    archive='physics',
-                    category='hep-th'
+                    group={'id': 'physics'},
+                    archive={'id': 'physics'},
+                    category={'id': 'hep-th'}
                 )
             ]),
             terms=FieldedSearchList([

From 7362810f9acb98ba98d899104081d4c4306fe9c0 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 12:46:48 -0500
Subject: [PATCH 47/77] fixed typing errors

---
 search/controllers/api/__init__.py    | 2 +-
 search/controllers/simple/__init__.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index a5220f02..6275a505 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -150,7 +150,7 @@ def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
                 'field': 'primary_classification',
                 'reason': 'not a valid archive'
             })
-        classifications.append(Classification(archive={'id': value}))
+        classifications.append(Classification(archive={'id': value}))   # type: ignore
     if len(classifications) == 0:
         return None
     return classifications
diff --git a/search/controllers/simple/__init__.py b/search/controllers/simple/__init__.py
index 065f38bf..5443aee0 100644
--- a/search/controllers/simple/__init__.py
+++ b/search/controllers/simple/__init__.py
@@ -248,7 +248,8 @@ def _update_with_archives(q: SimpleQuery, archives: List[str]) -> SimpleQuery:
     """
     logger.debug('Search within %s', archives)
     q.primary_classification = ClassificationList([
-        Classification(archive={'id': 'archive'}) for archive in archives
+        Classification(archive={'id': 'archive'})    # type: ignore
+        for archive in archives
     ])
     return q
 

From 9bf64522c79f9fcba34d028fcc0700d64c6de2b3 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 16:57:11 -0500
Subject: [PATCH 48/77] fixed bug

---
 Pipfile                               |  2 +-
 Pipfile.lock                          |  6 ++---
 search/domain/base.py                 | 35 ++++++++++++++++++++++++---
 search/services/index/highlighting.py | 11 ++++++---
 4 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/Pipfile b/Pipfile
index 47f6565e..571da1cc 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-base = "==0.11.1"
+arxiv-base = "==0.12.1rc2"
 arxiv-auth = "==0.1.0rc14"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 42fd6c91..70b87fff 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "dedd1ed309fd7aaaef88d597bca8ee694a362ae7ec05b0bfe2e60619cab79fbf"
+            "sha256": "59b8e0253b404fbf5bc25a8b9602e3ab61a9aabd1baa2a20768d000b7dda49bd"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:159a2c54ce7f0d562c5df28318b92086df8155e7aa29f7586835b9aaab74b749"
+                "sha256:9896b4f54d4a5e20c5f7ab7e8c65aa022781f8eba149b9730896fe22a44e0535"
             ],
             "index": "pypi",
-            "version": "==0.11.1"
+            "version": "==0.12.1rc2"
         },
         "bleach": {
             "hashes": [
diff --git a/search/domain/base.py b/search/domain/base.py
index 7ba8e627..7149cc7c 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -6,6 +6,8 @@
 from pytz import timezone
 import re
 
+from arxiv import taxonomy
+
 from dataclasses import dataclass, field
 from dataclasses import asdict as _asdict
 
@@ -130,11 +132,38 @@ class Classification:
     archive: Optional[dict] = None
     category: Optional[dict] = None
 
+    @property
+    def group_display(self) -> str:
+        if "name" in self.group:
+            return self.group["name"]
+        return taxonomy.get_group_display(self.group["id"])
+
+    @property
+    def archive_display(self) -> str:
+        if "name" in self.archive:
+            return self.archive["name"]
+        return taxonomy.get_archive_display(self.archive["id"])
+
+    @property
+    def category_display(self) -> str:
+        if "name" in self.category:
+            return self.category["name"]
+        return taxonomy.get_category_display(self.category["id"])
+
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""
-        return ":".join(
-            [p['id'] for p in [self.group, self.archive, self.category] if p]
-        )
+        s = ""
+        if self.group:
+            s += self.group_display
+        if self.archive:
+            if s:
+                s += " :: "
+            s += self.archive_display
+        if self.category:
+            if s:
+                s += " :: "
+            s += self.category_display
+        return s
 
 
 class ClassificationList(list):
diff --git a/search/services/index/highlighting.py b/search/services/index/highlighting.py
index fc085414..0c30226c 100644
--- a/search/services/index/highlighting.py
+++ b/search/services/index/highlighting.py
@@ -169,7 +169,7 @@ def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
 
     # These are from hits within child documents, e.g.
     # secondary_classification.
-    inner_hits = getattr(raw.meta, 'inner_hits', [])
+    inner_hits = getattr(raw.meta, 'inner_hits', None)
 
     # The values here will (almost) always be list-like. So we need to stitch
     # them together. Note that dir(None) won't return anything, so this block
@@ -207,9 +207,12 @@ def add_highlighting(result: dict, raw: Union[Response, Hit]) -> dict:
         if field not in result['highlight']:
             result['match'][field] = True
 
-    result['match']['secondary_classification'] = [
-        ih.category.id for ih in inner_hits.secondary_classification
-    ]
+    # We're using inner_hits to see which category in particular responded to
+    # the query.
+    if hasattr(inner_hits, 'secondary_classification'):
+        result['match']['secondary_classification'] = [
+            ih.category.id for ih in inner_hits.secondary_classification
+        ]
 
     # We just want to know whether there was a hit on the announcement date.
     result['match']['announced_date_first'] = (

From 3dbb9b789d44f97aef5bd700978abff0a1a2eade Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 18:42:24 -0500
Subject: [PATCH 49/77] supporting cross-list classification in high-level
 filtering, per #209

---
 DECISIONS.md                                 |  5 +-
 search/controllers/advanced/__init__.py      | 10 ++--
 search/controllers/advanced/forms.py         |  8 +++
 search/controllers/advanced/tests.py         | 44 ++++++++---------
 search/controllers/api/__init__.py           |  2 +-
 search/controllers/simple/__init__.py        |  4 +-
 search/domain/advanced.py                    | 10 +++-
 search/domain/base.py                        |  7 ++-
 search/services/index/__init__.py            |  5 +-
 search/services/index/advanced.py            | 35 ++++----------
 search/services/index/prepare.py             | 51 ++++++++------------
 search/services/index/simple.py              |  8 ++-
 search/templates/search/advanced_search.html |  9 ++++
 13 files changed, 102 insertions(+), 96 deletions(-)

diff --git a/DECISIONS.md b/DECISIONS.md
index f8056ac8..ebb1045e 100644
--- a/DECISIONS.md
+++ b/DECISIONS.md
@@ -40,5 +40,6 @@
 - 2018-12-18. Removing cross-list functionality in v0.1 was a regression. Users
   expect to be able to search by cross-list category just like primary
   category. We decided to include cross-list/secondary category in the
-  all-fields search, and also add a cross-list option to the advanced search
-  interface.
+  all-fields search, add a cross-list field to the advanced search interface,
+  and include cross-list classification in shortcut routes and the advanced
+  interface's classification filter (with option to exclude).
diff --git a/search/controllers/advanced/__init__.py b/search/controllers/advanced/__init__.py
index 18f5a851..dbca1b23 100644
--- a/search/controllers/advanced/__init__.py
+++ b/search/controllers/advanced/__init__.py
@@ -178,6 +178,8 @@ def _query_from_form(form: forms.AdvancedSearchForm) -> AdvancedQuery:
     q = _update_query_with_dates(q, form.date.data)
     q = _update_query_with_terms(q, form.terms.data)
     q = _update_query_with_classification(q, form.classification.data)
+    q.include_cross_list = form.classification.include_cross_list.data \
+        == form.classification.INCLUDE_CROSS_LIST
     if form.include_older_versions.data:
         q.include_older_versions = True
     order = form.order.data
@@ -189,7 +191,7 @@ def _query_from_form(form: forms.AdvancedSearchForm) -> AdvancedQuery:
 
 def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \
         -> AdvancedQuery:
-    q.primary_classification = ClassificationList()
+    q.classification = ClassificationList()
     archives = [
         ('computer_science', 'cs'), ('economics', 'econ'), ('eess', 'eess'),
         ('mathematics', 'math'), ('q_biology', 'q-bio'),
@@ -199,16 +201,16 @@ def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \
         if data.get(field):
             # Fix for these typing issues is coming soon!
             #  See: https://github.com/python/mypy/pull/4397
-            q.primary_classification.append(
+            q.classification.append(
                 Classification(archive={'id': archive})  # type: ignore
             )
     if data.get('physics') and 'physics_archives' in data:
         if 'all' in data['physics_archives']:
-            q.primary_classification.append(
+            q.classification.append(
                 Classification(group={'id': 'grp_physics'})  # type: ignore
             )
         else:
-            q.primary_classification.append(
+            q.classification.append(
                 Classification(     # type: ignore
                     group={'id': 'grp_physics'},
                     archive={'id': data['physics_archives']}
diff --git a/search/controllers/advanced/forms.py b/search/controllers/advanced/forms.py
index 40b4eeea..a5035e7f 100644
--- a/search/controllers/advanced/forms.py
+++ b/search/controllers/advanced/forms.py
@@ -102,6 +102,9 @@ class ClassificationForm(Form):
          in taxonomy.ARCHIVES_ACTIVE.items()
          if description['in_group'] == 'grp_physics']
 
+    INCLUDE_CROSS_LIST = 'include'
+    EXCLUDE_CROSS_LIST = 'exclude'
+
     computer_science = BooleanField('Computer Science (cs)')
     economics = BooleanField('Economics (econ)')
     eess = BooleanField('Electrical Engineering and Systems Science (eess)')
@@ -112,6 +115,11 @@ class ClassificationForm(Form):
     q_finance = BooleanField('Quantitative Finance (q-fin)')
     statistics = BooleanField('Statistics (stat)')
 
+    include_cross_list = RadioField('Include cross-list', choices=[
+        (INCLUDE_CROSS_LIST, 'Include cross-listed papers'),
+        (EXCLUDE_CROSS_LIST, 'Exclude cross-listed papers')
+    ], default=INCLUDE_CROSS_LIST)
+
 
 def yearInBounds(form: Form, field: DateField) -> None:
     """May not be prior to 1991, or later than the current year."""
diff --git a/search/controllers/advanced/tests.py b/search/controllers/advanced/tests.py
index 93c347d9..283e0196 100644
--- a/search/controllers/advanced/tests.py
+++ b/search/controllers/advanced/tests.py
@@ -392,31 +392,31 @@ def test_classification_is_selected(self):
         class_data = {'computer_science': True}
         q = advanced._update_query_with_classification(Query(), class_data)
         self.assertIsInstance(q, Query)
-        self.assertIsInstance(q.primary_classification, list)
-        self.assertEqual(len(q.primary_classification), 1)
-        self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertEqual(q.primary_classification[0].archive['id'], 'cs')
+        self.assertIsInstance(q.classification, list)
+        self.assertEqual(len(q.classification), 1)
+        self.assertIsInstance(q.classification[0], Classification)
+        self.assertEqual(q.classification[0].archive['id'], 'cs')
 
     def test_multiple_classifications_are_selected(self):
         """Selected classifications are added to the query."""
         class_data = {'computer_science': True, 'eess': True}
         q = advanced._update_query_with_classification(Query(), class_data)
         self.assertIsInstance(q, Query)
-        self.assertIsInstance(q.primary_classification, list)
-        self.assertEqual(len(q.primary_classification), 2)
-        self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertIsInstance(q.primary_classification[1], Classification)
+        self.assertIsInstance(q.classification, list)
+        self.assertEqual(len(q.classification), 2)
+        self.assertIsInstance(q.classification[0], Classification)
+        self.assertIsInstance(q.classification[1], Classification)
 
     def test_physics_is_selected_all_archives(self):
         """The physics group is added to the query."""
         class_data = {'physics': True, 'physics_archives': 'all'}
         q = advanced._update_query_with_classification(Query(), class_data)
         self.assertIsInstance(q, Query)
-        self.assertIsInstance(q.primary_classification, list)
-        self.assertEqual(len(q.primary_classification), 1)
-        self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertIsNone(q.primary_classification[0].archive)
-        self.assertEqual(q.primary_classification[0].group['id'],
+        self.assertIsInstance(q.classification, list)
+        self.assertEqual(len(q.classification), 1)
+        self.assertIsInstance(q.classification[0], Classification)
+        self.assertIsNone(q.classification[0].archive)
+        self.assertEqual(q.classification[0].group['id'],
                          'grp_physics')
 
     def test_physics_is_selected_specific_archive(self):
@@ -424,11 +424,11 @@ def test_physics_is_selected_specific_archive(self):
         class_data = {'physics': True, 'physics_archives': 'hep-ex'}
         q = advanced._update_query_with_classification(Query(), class_data)
         self.assertIsInstance(q, Query)
-        self.assertIsInstance(q.primary_classification, list)
-        self.assertEqual(len(q.primary_classification), 1)
-        self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertEqual(q.primary_classification[0].archive['id'], 'hep-ex')
-        self.assertEqual(q.primary_classification[0].group['id'],
+        self.assertIsInstance(q.classification, list)
+        self.assertEqual(len(q.classification), 1)
+        self.assertIsInstance(q.classification[0], Classification)
+        self.assertEqual(q.classification[0].archive['id'], 'hep-ex')
+        self.assertEqual(q.classification[0].group['id'],
                          'grp_physics')
 
     def test_physics_is_selected_specific_archive_plus_other_groups(self):
@@ -440,10 +440,10 @@ def test_physics_is_selected_specific_archive_plus_other_groups(self):
         }
         q = advanced._update_query_with_classification(Query(), class_data)
         self.assertIsInstance(q, Query)
-        self.assertIsInstance(q.primary_classification, list)
-        self.assertEqual(len(q.primary_classification), 2)
-        self.assertIsInstance(q.primary_classification[0], Classification)
-        self.assertIsInstance(q.primary_classification[1], Classification)
+        self.assertIsInstance(q.classification, list)
+        self.assertEqual(len(q.classification), 2)
+        self.assertIsInstance(q.classification[0], Classification)
+        self.assertIsInstance(q.classification[1], Classification)
 
 
 class TestUpdateQueryWithFieldedTerms(TestCase):
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 6275a505..9b6c5b47 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -54,7 +54,7 @@ def search(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
 
     classifications = _get_classifications(params)
     if classifications is not None:
-        q.primary_classification = classifications
+        q.classification = classifications
 
     include_fields = _get_include_fields(params)
     if include_fields:
diff --git a/search/controllers/simple/__init__.py b/search/controllers/simple/__init__.py
index 5443aee0..e871ada2 100644
--- a/search/controllers/simple/__init__.py
+++ b/search/controllers/simple/__init__.py
@@ -247,8 +247,8 @@ def _update_with_archives(q: SimpleQuery, archives: List[str]) -> SimpleQuery:
     :class:`SimpleQuery`
     """
     logger.debug('Search within %s', archives)
-    q.primary_classification = ClassificationList([
-        Classification(archive={'id': 'archive'})    # type: ignore
+    q.classification = ClassificationList([
+        Classification(archive={'id': archive})    # type: ignore
         for archive in archives
     ])
     return q
diff --git a/search/domain/advanced.py b/search/domain/advanced.py
index 2d2dda30..c15b4db5 100644
--- a/search/domain/advanced.py
+++ b/search/domain/advanced.py
@@ -34,7 +34,7 @@ class AdvancedQuery(Query):
 
     An advanced query contains fielded search terms and boolean operators.
     """
-    
+
     SUPPORTED_FIELDS = [
         ('title', 'Title'),
         ('author', 'Author(s)'),
@@ -53,7 +53,13 @@ class AdvancedQuery(Query):
     ]
 
     date_range: Optional[DateRange] = None
-    primary_classification: ClassificationList = field(
+
+    classification: ClassificationList = field(
         default_factory=ClassificationList
     )
+    """Classification(s) by which to limit results."""
+
+    include_cross_list: bool = field(default=True)
+    """If True, secondaries are considered when limiting by classification."""
+
     terms: FieldedSearchList = field(default_factory=FieldedSearchList)
diff --git a/search/domain/base.py b/search/domain/base.py
index 7149cc7c..6abd951c 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -231,9 +231,14 @@ class SimpleQuery(Query):
 
     search_field: str = field(default_factory=str)
     value: str = field(default_factory=str)
-    primary_classification: ClassificationList = field(
+
+    classification: ClassificationList = field(
         default_factory=ClassificationList
     )
+    """Classification(s) by which to limit results."""
+
+    include_cross_list: bool = field(default=True)
+    """If True, secondaries are considered when limiting by classification."""
 
 
 @dataclass(init=True)
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index a7d7e664..1216836c 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -399,8 +399,9 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
             elif isinstance(query, SimpleQuery):
                 current_search = simple_search(current_search, query)
         except TypeError as e:
-            logger.error('Malformed query: %s', str(e))
-            raise QueryError('Malformed query') from e
+            raise e
+            # logger.error('Malformed query: %s', str(e))
+            # raise QueryError('Malformed query') from e
 
         if highlight:
             # Highlighting is performed by Elasticsearch; here we include the
diff --git a/search/services/index/advanced.py b/search/services/index/advanced.py
index 8b744052..ebedda22 100644
--- a/search/services/index/advanced.py
+++ b/search/services/index/advanced.py
@@ -2,12 +2,15 @@
 
 from typing import Any, Union
 
+from functools import reduce, wraps
+from operator import ior, iand
+
 from elasticsearch_dsl import Search, Q, SF
 from elasticsearch_dsl.query import Range, Match, Bool
 
 from search.domain import AdvancedQuery, Classification, APIQuery
 
-from .prepare import SEARCH_FIELDS
+from .prepare import SEARCH_FIELDS, limit_by_classification
 from .util import sort
 
 
@@ -35,10 +38,14 @@ def advanced_search(search: Search,
     # behavior of faceted search.
     if not query.include_older_versions:
         search = search.filter("term", is_current=True)
+    _q_clsn = limit_by_classification(query.classification)
+    if query.include_cross_list:
+        _q_clsn |= limit_by_classification(query.classification,
+                                           "secondary_classification")
     q = (
         _fielded_terms_to_q(query)
         & _date_range(query)
-        & _classifications(query)
+        & _q_clsn
     )
     if query.order is None or query.order == 'relevance':
         # Boost the current version heavily when sorting by relevance.
@@ -52,30 +59,6 @@ def advanced_search(search: Search,
     return search
 
 
-def _classification(field: str, classification: Classification) -> Match:
-    """Get a query part for a :class:`.Classification`."""
-    query = Q()
-    if classification.group:
-        field_name = '%s__group__id' % field
-        query &= Q('match', **{field_name: classification.group['id']})
-    if classification.archive:
-        field_name = '%s__archive__id' % field
-        query &= Q('match', **{field_name: classification.archive['id']})
-    return query
-
-
-def _classifications(q: Union[AdvancedQuery, APIQuery]) -> Match:
-    """Get a query part for classifications on an :class:`.AdvancedQuery`."""
-    if not q.primary_classification:
-        return Q()
-    query = _classification('primary_classification',
-                            q.primary_classification[0])
-    if len(q.primary_classification) > 1:
-        for classification in q.primary_classification[1:]:
-            query |= _classification('primary_classification', classification)
-    return query
-
-
 def _date_range(q: Union[AdvancedQuery, APIQuery]) -> Range:
     """Generate a query part for a date range."""
     if not q.date_range:
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index 9535e319..eed44d26 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -306,7 +306,7 @@ def _query_all_fields(term: str) -> Q:
                 match_remainder = _query_combined(remainder)
                 match_all_fields |= (match_remainder & match_date)
 
-                match_individual_sans_date = reduce(ior, [
+                match_sans_date = reduce(ior, [
                     _query_paper_id(remainder, operator='AND'),
                     author_query(remainder, operator='AND'),
                     _query_title(remainder, default_operator='and'),
@@ -322,7 +322,7 @@ def _query_all_fields(term: str) -> Q:
                     _query_primary(remainder, operator='and'),
                     _query_secondary(remainder, operator='and')
                 ])
-                match_individual_field |= (match_individual_sans_date & match_date)
+                match_individual_field |= (match_sans_date & match_date)
             else:
                 match_all_fields |= match_date
 
@@ -334,37 +334,24 @@ def _query_all_fields(term: str) -> Q:
              boost_mode='multiply')
 
 
-def limit_by_classification(classifications: ClassificationList) -> Q:
+def limit_by_classification(classifications: ClassificationList,
+                            field: str = 'primary_classification') -> Q:
     """Generate a :class:`Q` to limit a query by by classification."""
-    def _to_q(classification: Classification) -> Q:
-        _qs = []
-        if classification.group:
-            _qs.append(
-                Q("match", **{
-                    "primary_classification__group__id": {
-                        "query": classification.group['id']
-                    }
-                })
-            )
-        if classification.archive:
-            _qs.append(
-                Q("match", **{
-                    "primary_classification__archive__id": {
-                        "query": classification.archive['id']
-                    }
-                })
-            )
-        if classification.category:
-            _qs.append(
-                Q("match", **{
-                    "primary_classification__category__id": {
-                        "query": classification.category['id']
-                    }
-                })
-            )
-        return reduce(iand, _qs)
-
-    return reduce(ior, [_to_q(clsn) for clsn in classifications])
+    if len(classifications) == 0:
+        return Q()
+
+    def _to_q(clsn: Classification) -> Q:
+        return reduce(iand, [
+            Q('match', **{f'{field}__{level}__id': getattr(clsn, level)['id']})
+            for level in ['group', 'archive', 'category']
+            if getattr(clsn, level) is not None
+        ])
+
+    _q = reduce(ior, map(_to_q, classifications))
+    if field == 'secondary_classification':
+        _q = Q("nested", path="secondary_classification", query=_q)
+
+    return _q
 
 
 SEARCH_FIELDS: Dict[str, Callable[[str], Q]] = dict([
diff --git a/search/services/index/simple.py b/search/services/index/simple.py
index 8ba01086..1406f0c1 100644
--- a/search/services/index/simple.py
+++ b/search/services/index/simple.py
@@ -28,8 +28,12 @@ def simple_search(search: Search, query: SimpleQuery) -> Search:
     """
     search = search.filter("term", is_current=True)
     q = SEARCH_FIELDS[query.search_field](query.value)
-    if query.primary_classification:
-        q &= limit_by_classification(query.primary_classification)
+    if query.classification:
+        _q = limit_by_classification(query.classification)
+        if query.include_cross_list:
+            _q |= limit_by_classification(query.classification,
+                                          "secondary_classification")
+        q &= _q
     search = search.query(q)
     search = sort(query, search)
     return search
diff --git a/search/templates/search/advanced_search.html b/search/templates/search/advanced_search.html
index 928a490e..d7668351 100644
--- a/search/templates/search/advanced_search.html
+++ b/search/templates/search/advanced_search.html
@@ -147,6 +147,15 @@
                 {{ select_field(form.classification.statistics) }}
               </div>
             </div>
+            <div class="field">
+              <div class="control">
+                {% for subfield in form.classification.include_cross_list %}
+                <label class="radio">
+                  {{ subfield }} {{ subfield.label.text }}
+                </label>
+                {% endfor %}
+              </div>
+            </div>
           </fieldset>
           <fieldset class="fieldset {% if form.date.errors %}is-warning{% endif %}">
             <legend class="legend">Date</legend>

From 0f9f41bad710407bf6ff57d950b7c56176455bdc Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 18:51:12 -0500
Subject: [PATCH 50/77] updated some dependencies, refs

---
 Dockerfile                        | 2 +-
 Dockerfile-agent                  | 2 +-
 Dockerfile-api                    | 2 +-
 Dockerfile-index                  | 2 +-
 search/config.py                  | 4 ++--
 search/templates/search/base.html | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f9bbd0c9..8384dddf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@
 # Defines the runtime for the arXiv search service, which provides the main
 # UIs (and, eventually, APIs) for search.
 
-FROM arxiv/base:0.6.1
+FROM arxiv/base:0.12.1rc2
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-agent b/Dockerfile-agent
index 12e8d369..dca3805d 100644
--- a/Dockerfile-agent
+++ b/Dockerfile-agent
@@ -4,7 +4,7 @@
 #  article metadata becomes available. Subscribes to a Kinesis stream for
 #  notifications about new metadata.
 
-FROM arxiv/search:0.4
+FROM arxiv/search:0.5
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-api b/Dockerfile-api
index 48e49dd3..918b9f1f 100644
--- a/Dockerfile-api
+++ b/Dockerfile-api
@@ -3,7 +3,7 @@
 # Defines the runtime for the arXiv search API, which provides a metadata
 # query API backed by Elasticsearch.
 
-FROM arxiv/base:0.6.1
+FROM arxiv/base:0.12.1rc2
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-index b/Dockerfile-index
index 834dd631..fbd048ba 100644
--- a/Dockerfile-index
+++ b/Dockerfile-index
@@ -16,7 +16,7 @@
 #
 # See also ELASTICSEARCH_* and METADATA_ENDPOINT parameters, below.
 
-FROM arxiv/base
+FROM arxiv/base:0.12.1rc2
 
 # Add Python consumer and configuration.
 ADD requirements/prod.txt /opt/arxiv/requirements.txt
diff --git a/search/config.py b/search/config.py
index ccab3147..99f17598 100644
--- a/search/config.py
+++ b/search/config.py
@@ -224,8 +224,8 @@
 FLASKS3_ACTIVE = os.environ.get('FLASKS3_ACTIVE', 0)
 
 # Settings for display of release information
-RELEASE_NOTES_URL = 'https://confluence.cornell.edu/x/8H5OFQ'
-RELEASE_NOTES_TEXT = 'Search v0.4 released 2018-07-18'
+RELEASE_NOTES_URL = 'https://confluence.cornell.edu/x/giazFQ'
+RELEASE_NOTES_TEXT = 'Search v0.5 released 2018-12-20'
 
 
 EXTERNAL_URL_SCHEME = os.environ.get('EXTERNAL_URL_SCHEME', 'https')
diff --git a/search/templates/search/base.html b/search/templates/search/base.html
index e7e421db..966ad7dc 100644
--- a/search/templates/search/base.html
+++ b/search/templates/search/base.html
@@ -28,7 +28,7 @@
     	},
       fieldValues: {
         "components": ["16000"],  // Search component.
-        "versions": ["14134"],  // Release search-0.4
+        "versions": ["14157"],  // Release search-0.5
         "customfield_11401": window.location.href
       }
     };

From 1732e2c7ac9f507606f6d12c23d2abb7eafc2e06 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 18:52:55 -0500
Subject: [PATCH 51/77] fixed broken test

---
 search/services/index/tests/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search/services/index/tests/tests.py b/search/services/index/tests/tests.py
index 85bc1f45..a7d8f8e9 100644
--- a/search/services/index/tests/tests.py
+++ b/search/services/index/tests/tests.py
@@ -50,7 +50,7 @@ def test_advanced_query(self, mock_Elasticsearch, mock_Search):
                 start_date=datetime.now() - timedelta(days=5),
                 end_date=datetime.now()
             ),
-            primary_classification=ClassificationList([
+            classification=ClassificationList([
                 Classification(
                     group={'id': 'physics'},
                     archive={'id': 'physics'},

From be10271a91459d40649b5f2f0311de5c4d0a3970 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Tue, 18 Dec 2018 19:00:40 -0500
Subject: [PATCH 52/77] fixed mypy issues

---
 search/domain/api.py  |  3 ++-
 search/domain/base.py | 30 ++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/search/domain/api.py b/search/domain/api.py
index bbe41f51..a1bae351 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -26,9 +26,10 @@ class APIQuery(Query):
     """
 
     date_range: Optional[DateRange] = None
-    primary_classification: ClassificationList = field(
+    classification: ClassificationList = field(
         default_factory=ClassificationList
     )
+    include_cross_list: bool = field(default=True)
     terms: FieldedSearchList = field(default_factory=FieldedSearchList)
     include_fields: List[str] = field(default_factory=get_default_extra_fields)
 
diff --git a/search/domain/base.py b/search/domain/base.py
index 6abd951c..57a203fe 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -134,21 +134,39 @@ class Classification:
 
     @property
     def group_display(self) -> str:
+        """Get a human-friendly display label for the group."""
+        if self.group is None:
+            return ""
+        label: str
         if "name" in self.group:
-            return self.group["name"]
-        return taxonomy.get_group_display(self.group["id"])
+            label = self.group["name"]
+        else:
+            label = taxonomy.get_group_display(self.group["id"])
+        return label
 
     @property
     def archive_display(self) -> str:
+        """Get a human-friendly display label for the archive."""
+        if self.archive is None:
+            return ""
+        label: str
         if "name" in self.archive:
-            return self.archive["name"]
-        return taxonomy.get_archive_display(self.archive["id"])
+            label = self.archive["name"]
+        else:
+            label = taxonomy.get_archive_display(self.archive["id"])
+        return label
 
     @property
     def category_display(self) -> str:
+        """Get a human-friendly display label for the category."""
+        if self.category is None:
+            return ""
+        label: str
         if "name" in self.category:
-            return self.category["name"]
-        return taxonomy.get_category_display(self.category["id"])
+            label = self.category["name"]
+        else:
+            label = taxonomy.get_category_display(self.category["id"])
+        return label
 
     def __str__(self) -> str:
         """Build a string representation, for use in rendering."""

From e540a33c075c08044b5f9686f5c83197910ace4e Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Wed, 19 Dec 2018 12:42:38 -0500
Subject: [PATCH 53/77] minor update to dockerfile

---
 Dockerfile-agent | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile-agent b/Dockerfile-agent
index dca3805d..ab160de3 100644
--- a/Dockerfile-agent
+++ b/Dockerfile-agent
@@ -4,7 +4,7 @@
 #  article metadata becomes available. Subscribes to a Kinesis stream for
 #  notifications about new metadata.
 
-FROM arxiv/search:0.5
+FROM arxiv/search
 
 WORKDIR /opt/arxiv
 

From d2a1499bc7ce3ff9866b3e3c5583f700b8d130ff Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Wed, 19 Dec 2018 12:52:52 -0500
Subject: [PATCH 54/77] updated dockerfile

---
 Dockerfile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 8384dddf..28de131b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,6 +7,9 @@ FROM arxiv/base:0.12.1rc2
 
 WORKDIR /opt/arxiv
 
+# Install MySQL.
+RUN yum install -y which mysql mysql-devel
+
 # Add Python application and configuration.
 ADD app.py /opt/arxiv/
 ADD Pipfile /opt/arxiv/

From 4f722c16443c0555fc53b76b92e055e6e3c713bf Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 07:07:13 -0500
Subject: [PATCH 55/77] ARXIVNG-1448 support for primary, ARXIVNG-1447
 secondary categories

---
 Dockerfile-agent                   |   2 +-
 schema/search.yaml                 |  52 +++++++++++-
 search/controllers/api/__init__.py |  55 ++++++++----
 search/domain/api.py               |  14 +--
 search/services/index/__init__.py  |   5 +-
 search/services/index/advanced.py  |  16 ++--
 search/services/index/api.py       | 132 +++++++++++++++++++++++++++++
 search/services/index/prepare.py   |  19 +++++
 8 files changed, 259 insertions(+), 36 deletions(-)
 create mode 100644 search/services/index/api.py

diff --git a/Dockerfile-agent b/Dockerfile-agent
index ab160de3..60051f78 100644
--- a/Dockerfile-agent
+++ b/Dockerfile-agent
@@ -4,7 +4,7 @@
 #  article metadata becomes available. Subscribes to a Kinesis stream for
 #  notifications about new metadata.
 
-FROM arxiv/search
+FROM arxiv/search:d2a1499bc7ce3ff9866b3e3c5583f700b8d130ff
 
 WORKDIR /opt/arxiv
 
diff --git a/schema/search.yaml b/schema/search.yaml
index e77367db..ae3ef7c9 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -166,12 +166,26 @@ paths:
           explode: true
           schema:
             type: string
-        - name: primary_category
+        - name: primary_classification
           in: query
           description: |
-            Slug for the primary category or categories to which results
-            should be limited. Valid categories are defined at
-            https://github.com/cul-it/arxiv-base/blob/master/arxiv/taxonomy/__init__.py#L306.
+            Limit query by primary classification. This field supports
+            filtering by group, archive, and category. Note that group names
+            are prefixed by ``grp_``, e.g. ``grp_physics``, ``grp_q-bio``. See
+            https://arxiv.github.io/arxiv-base/arxiv/arxiv.taxonomy.html for
+            more information.
+          examples:
+            groupMath:
+              summary: Include results from the math group.
+              value: grp_math
+            archivePhysics:
+              summary: Include results from the physics archive.
+              value: physics
+            categoryHE:
+              summary: |
+                Include results from the High Enegery Astrophysical Phenomena
+                category.
+              value: astro-ph.HE
           required: false
           style: form
           explode: true
@@ -179,6 +193,36 @@ paths:
             type: array
             items:
               type: string
+              
+        - name: secondary_classification
+          in: query
+          description: |
+            Limit query by secondary (cross-list) classification. This field
+            supports filtering by group, archive, and category. Note that group
+            names are prefixed by ``grp_``, e.g. ``grp_physics``,
+            ``grp_q-bio``. See
+            https://arxiv.github.io/arxiv-base/arxiv/arxiv.taxonomy.html for
+            more information.
+          examples:
+            groupMath:
+              summary: Include results cross-listed in the math group.
+              value: grp_math
+            archivePhysics:
+              summary: Include results cross-listed in the physics archive.
+              value: physics
+            categoryHE:
+              summary: |
+                Include results cross-listed in the High Enegery Astrophysical
+                Phenomena category.
+              value: astro-ph.HE
+          required: false
+          style: form
+          explode: true
+          schema:
+            type: array
+            items:
+              type: string
+
         - name: include
           in: query
           description: |
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 9b6c5b47..c24eb867 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -52,9 +52,16 @@ def search(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     if date_range is not None:
         q.date_range = date_range
 
-    classifications = _get_classifications(params)
-    if classifications is not None:
-        q.classification = classifications
+    primary_query = params.get('primary_classification')
+    if primary_query:
+        primary_classification = _get_classification(primary_query)
+        q.primary_classification = primary_classification
+
+    secondary_query = params.getlist('secondary_classification')
+    if secondary_query:
+        q.secondary_classification = list(
+            map(_get_classification, secondary_query)
+        )
 
     include_fields = _get_include_fields(params)
     if include_fields:
@@ -142,15 +149,33 @@ def _get_date_params(params: MultiDict) -> Optional[DateRange]:
     return None
 
 
-def _get_classifications(params: MultiDict) -> Optional[ClassificationList]:
-    classifications = ClassificationList()
-    for value in params.getlist('primary_classification'):
-        if value not in taxonomy.ARCHIVES:
-            raise BadRequest({
-                'field': 'primary_classification',
-                'reason': 'not a valid archive'
-            })
-        classifications.append(Classification(archive={'id': value}))   # type: ignore
-    if len(classifications) == 0:
-        return None
-    return classifications
+def _to_classification(value: List[str]) -> Tuple[Classification]:
+    clsns = []
+    if value in taxonomy.definitions.GROUPS:
+        klass = taxonomy.Group
+        field = 'group'
+    elif value in taxonomy.definitions.ARCHIVES:
+        klass = taxonomy.Archive
+        field = 'archive'
+    elif value in taxonomy.definitions.CATEGORIES:
+        klass = taxonomy.Category
+        field = 'category'
+    else:
+        raise ValueError('not a valid classification')
+    value = klass(value)
+    clsns.append(Classification(**{field: {'id': value}}))   # type: ignore
+    if value.unalias != value:
+        clsns.append(Classification(**{field: {'id': value.unalias()}}))   # type: ignore
+    if value.canonical != value:
+        clsns.append(Classification(**{field: {'id': value.canonical}}))   # type: ignore
+    return tuple(clsns)
+
+
+def _get_classification(value: str) -> Tuple[Classification]:
+    try:
+        return _to_classification(value)
+    except ValueError:
+        raise BadRequest({
+            'field': 'primary_classification',
+            'reason': 'not a valid classification term'
+        })
diff --git a/search/domain/api.py b/search/domain/api.py
index a1bae351..13cd9a4f 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -1,10 +1,10 @@
 """API-specific domain classes."""
 
-from .base import DateRange, Query, ClassificationList, List
+from .base import DateRange, Query, ClassificationList, Classification, List
 from .advanced import FieldedSearchList, FieldedSearchTerm
 
 from dataclasses import dataclass, field
-from typing import NamedTuple, Optional
+from typing import NamedTuple, Optional, Tuple
 
 
 def get_default_extra_fields() -> List[str]:
@@ -24,12 +24,14 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
-
     date_range: Optional[DateRange] = None
-    classification: ClassificationList = field(
-        default_factory=ClassificationList
+    primary_classification: Tuple[Classification, ...] = \
+        field(default_factory=tuple)
+    """Limit results to a specific primary classification."""
+    secondary_classification: List[Tuple[Classification, ...]] = field(
+        default_factory=list
     )
-    include_cross_list: bool = field(default=True)
+    """Limit results by cross-list classification."""
     terms: FieldedSearchList = field(default_factory=FieldedSearchList)
     include_fields: List[str] = field(default_factory=get_default_extra_fields)
 
diff --git a/search/services/index/__init__.py b/search/services/index/__init__.py
index 1216836c..dba9db93 100644
--- a/search/services/index/__init__.py
+++ b/search/services/index/__init__.py
@@ -40,6 +40,7 @@
 from .util import MAX_RESULTS
 from .advanced import advanced_search
 from .simple import simple_search
+from .api import api_search
 from . import highlighting
 from . import results
 
@@ -394,10 +395,12 @@ def search(self, query: Query, highlight: bool = True) -> DocumentSet:
         logger.debug('got current search request %s', str(query))
         current_search = self._base_search()
         try:
-            if isinstance(query, AdvancedQuery) or isinstance(query, APIQuery):
+            if isinstance(query, AdvancedQuery):
                 current_search = advanced_search(current_search, query)
             elif isinstance(query, SimpleQuery):
                 current_search = simple_search(current_search, query)
+            elif isinstance(query, APIQuery):
+                current_search = api_search(current_search, query)
         except TypeError as e:
             raise e
             # logger.error('Malformed query: %s', str(e))
diff --git a/search/services/index/advanced.py b/search/services/index/advanced.py
index ebedda22..d33fa3d1 100644
--- a/search/services/index/advanced.py
+++ b/search/services/index/advanced.py
@@ -8,14 +8,13 @@
 from elasticsearch_dsl import Search, Q, SF
 from elasticsearch_dsl.query import Range, Match, Bool
 
-from search.domain import AdvancedQuery, Classification, APIQuery
+from search.domain import AdvancedQuery, Classification
 
 from .prepare import SEARCH_FIELDS, limit_by_classification
 from .util import sort
 
 
-def advanced_search(search: Search,
-                    query: Union[AdvancedQuery, APIQuery]) -> Search:
+def advanced_search(search: Search, query: AdvancedQuery) -> Search:
     """
     Prepare a :class:`.Search` from a :class:`.AdvancedQuery`.
 
@@ -23,9 +22,8 @@ def advanced_search(search: Search,
     ----------
     search : :class:`.Search`
         An Elasticsearch search in preparation.
-    query : :class:`.AdvancedQuery` or :class:`APIQuery`
-        An advanced query, originating from the advanced search controller or
-        the API, respectively.
+    query : :class:`.AdvancedQuery`
+        A query originating from the advanced search UI.
 
     Returns
     -------
@@ -59,7 +57,7 @@ def advanced_search(search: Search,
     return search
 
 
-def _date_range(q: Union[AdvancedQuery, APIQuery]) -> Range:
+def _date_range(q: AdvancedQuery) -> Range:
     """Generate a query part for a date range."""
     if not q.date_range:
         return Q()
@@ -106,7 +104,7 @@ def _get_operator(obj: Any) -> str:
     return obj.operator     # type: ignore
 
 
-def _group_terms(query: Union[AdvancedQuery, APIQuery]) -> tuple:
+def _group_terms(query: AdvancedQuery) -> tuple:
     """Group fielded search terms into a set of nested tuples."""
     terms = query.terms[:]
     for operator in ['NOT', 'AND', 'OR']:
@@ -121,7 +119,7 @@ def _group_terms(query: Union[AdvancedQuery, APIQuery]) -> tuple:
     return terms[0]     # type: ignore
 
 
-def _fielded_terms_to_q(query: Union[AdvancedQuery, APIQuery]) -> Match:
+def _fielded_terms_to_q(query: AdvancedQuery) -> Match:
     if len(query.terms) == 1:
         return SEARCH_FIELDS[query.terms[0].field](query.terms[0].term)
     elif len(query.terms) > 1:
diff --git a/search/services/index/api.py b/search/services/index/api.py
new file mode 100644
index 00000000..1d76c122
--- /dev/null
+++ b/search/services/index/api.py
@@ -0,0 +1,132 @@
+"""Supports the advanced search feature."""
+
+from typing import Any, Union
+
+from functools import reduce, wraps
+from operator import ior, iand
+
+from elasticsearch_dsl import Search, Q, SF
+from elasticsearch_dsl.query import Range, Match, Bool
+
+from search.domain import Classification, APIQuery
+
+from .prepare import SEARCH_FIELDS, query_primary_exact, query_secondary_exact
+from .util import sort
+
+
+def api_search(search: Search, query: APIQuery) -> Search:
+    """
+    Prepare a :class:`.Search` from a :class:`.APIQuery`.
+
+    Parameters
+    ----------
+    search : :class:`.Search`
+        An Elasticsearch search in preparation.
+    query : :class:`.APIQuery`
+        An query originating from the API.
+
+    Returns
+    -------
+    :class:`.Search`
+        The passed ES search object, updated with specific query parameters
+        that implement the advanced query.
+
+    """
+    # Classification and date are treated as filters; this foreshadows the
+    # behavior of faceted search.
+    if not query.include_older_versions:
+        search = search.filter("term", is_current=True)
+
+    _q_clsn = Q()
+    if query.primary_classification:
+        _q_clsn &= reduce(ior, map(query_primary_exact,
+                                   list(query.primary_classification)))
+    if query.secondary_classification:
+        for classification in query.secondary_classification:
+            _q_clsn &= reduce(ior, map(query_secondary_exact,
+                                       list(classification)))
+    q = (
+        _fielded_terms_to_q(query)
+        & _date_range(query)
+        & _q_clsn
+    )
+    if query.order is None or query.order == 'relevance':
+        # Boost the current version heavily when sorting by relevance.
+        q = Q('function_score', query=q, boost=5, boost_mode="multiply",
+              score_mode="max",
+              functions=[
+                SF({'weight': 5, 'filter': Q('term', is_current=True)})
+              ])
+    search = sort(query, search)
+    search = search.query(q)
+    return search
+
+
+def _date_range(q: APIQuery) -> Range:
+    """Generate a query part for a date range."""
+    if not q.date_range:
+        return Q()
+    params = {}
+    if q.date_range.date_type == q.date_range.ANNOUNCED:
+        fmt = '%Y-%m'
+    else:
+        fmt = '%Y-%m-%dT%H:%M:%S%z'
+    if q.date_range.start_date:
+        params["gte"] = q.date_range.start_date.strftime(fmt)
+    if q.date_range.end_date:
+        params["lt"] = q.date_range.end_date.strftime(fmt)
+    return Q('range', **{q.date_range.date_type: params})
+
+
+def _grouped_terms_to_q(term_pair: tuple) -> Q:
+    """Generate a :class:`.Q` from grouped terms."""
+    term_a_raw, operator, term_b_raw = term_pair
+
+    if type(term_a_raw) is tuple:
+        term_a = _grouped_terms_to_q(term_a_raw)
+    else:
+        term_a = SEARCH_FIELDS[term_a_raw.field](term_a_raw.term)
+
+    if type(term_b_raw) is tuple:
+        term_b = _grouped_terms_to_q(term_b_raw)
+    else:
+        term_b = SEARCH_FIELDS[term_b_raw.field](term_b_raw.term)
+
+    if operator == 'OR':
+        return term_a | term_b
+    elif operator == 'AND':
+        return term_a & term_b
+    elif operator == 'NOT':
+        return term_a & ~term_b
+    else:
+        # TODO: Confirm proper exception.
+        raise TypeError("Invalid operator for terms")
+
+
+def _get_operator(obj: Any) -> str:
+    if type(obj) is tuple:
+        return _get_operator(obj[0])
+    return obj.operator     # type: ignore
+
+
+def _group_terms(query: APIQuery) -> tuple:
+    """Group fielded search terms into a set of nested tuples."""
+    terms = query.terms[:]
+    for operator in ['NOT', 'AND', 'OR']:
+        i = 0
+        while i < len(terms) - 1:
+            if _get_operator(terms[i+1]) == operator:
+                terms[i] = (terms[i], operator, terms[i+1])
+                terms.pop(i+1)
+                i -= 1
+            i += 1
+    assert len(terms) == 1
+    return terms[0]     # type: ignore
+
+
+def _fielded_terms_to_q(query: APIQuery) -> Match:
+    if len(query.terms) == 1:
+        return SEARCH_FIELDS[query.terms[0].field](query.terms[0].term)
+    elif len(query.terms) > 1:
+        return _grouped_terms_to_q(_group_terms(query))
+    return Q('match_all')
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index eed44d26..c06220cf 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -123,6 +123,25 @@ def _query_primary(term: str, operator: str = 'and') -> Q:
     })
 
 
+def query_primary_exact(classification: Classification) -> Q:
+    return reduce(iand, [
+        Q("match", **{f"primary_classification__{field}__id":
+                      getattr(classification, field)['id']})
+        for field in ['group', 'archive', 'category']
+        if getattr(classification, field, None) is not None
+    ])
+
+
+def query_secondary_exact(classification: Classification) -> Q:
+    return Q("nested", path="secondary_classification",
+             query=reduce(iand, [
+                 Q("match", **{f"secondary_classification__{field}__id":
+                               getattr(classification, field)['id']})
+                 for field in ['group', 'archive', 'category']
+                 if getattr(classification, field, None) is not None
+             ]))
+
+
 def _query_secondary(term: str, operator: str = 'and') -> Q:
     return Q(
         "nested",

From 6969f6579cbbaeeb027d765763de30be25767d5e Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 07:33:39 -0500
Subject: [PATCH 56/77] updated schema

---
 schema/search.yaml | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/schema/search.yaml b/schema/search.yaml
index ae3ef7c9..f4bef615 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -32,6 +32,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: author
           in: query
           description: |
@@ -46,6 +47,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: title
           in: query
           description: |
@@ -56,6 +58,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: abstract
           in: query
           description: |
@@ -66,6 +69,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: comments
           in: query
           description: |
@@ -77,6 +81,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: journal_ref
           in: query
           description: |
@@ -88,6 +93,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: report_num
           in: query
           description: |
@@ -99,6 +105,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: acm_class
           in: query
           description: |
@@ -109,6 +116,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: msc_class
           in: query
           description: |
@@ -119,6 +127,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: doi
           in: query
           description: |
@@ -129,6 +138,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: paper_id
           in: query
           description: |
@@ -140,6 +150,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: orcid
           in: query
           description: |
@@ -153,6 +164,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: author_id
           in: query
           description: |
@@ -166,6 +178,7 @@ paths:
           explode: true
           schema:
             type: string
+
         - name: primary_classification
           in: query
           description: |
@@ -176,24 +189,23 @@ paths:
             more information.
           examples:
             groupMath:
-              summary: Include results from the math group.
+              summary: Limit results to the math group.
               value: grp_math
             archivePhysics:
-              summary: Include results from the physics archive.
+              summary: Limit results to the physics archive.
               value: physics
             categoryHE:
               summary: |
-                Include results from the High Enegery Astrophysical Phenomena
+                Limit results to the High Enegery Astrophysical Phenomena
                 category.
               value: astro-ph.HE
           required: false
           style: form
-          explode: true
           schema:
             type: array
             items:
               type: string
-              
+
         - name: secondary_classification
           in: query
           description: |
@@ -205,15 +217,16 @@ paths:
             more information.
           examples:
             groupMath:
-              summary: Include results cross-listed in the math group.
+              summary: Limit results to those cross-listed in the math group.
               value: grp_math
             archivePhysics:
-              summary: Include results cross-listed in the physics archive.
+              summary: |
+                Limit results to those cross-listed in the physics archive.
               value: physics
             categoryHE:
               summary: |
-                Include results cross-listed in the High Enegery Astrophysical
-                Phenomena category.
+                Limit results to those cross-listed in the High Enegery
+                Astrophysical Phenomena category.
               value: astro-ph.HE
           required: false
           style: form
@@ -236,6 +249,7 @@ paths:
             type: array
             items:
               type: string
+
         - name: start_date
           in: query
           description: |
@@ -246,6 +260,7 @@ paths:
           schema:
             type: string
             format: date
+
         - name: end_date
           in: query
           description: |
@@ -256,6 +271,7 @@ paths:
           schema:
             type: string
             format: date
+
         - name: date_type
           in: query
           description: |

From 8aba213de2ee242e40b4b01d8afaf2b4cb2af6fd Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 07:52:29 -0500
Subject: [PATCH 57/77] ARXIVNG-1223 query parameters are included in response
 metadata

---
 schema/resources/Classification.json     |  9 ++++
 schema/resources/ClassificationTerm.json | 13 ++++++
 schema/resources/Document.json           | 50 +--------------------
 schema/resources/DocumentMetadata.json   | 50 +--------------------
 schema/resources/DocumentSet.json        | 17 ++++++-
 search/controllers/api/__init__.py       | 56 ++++++++++++++++--------
 search/routes/api/serialize.py           |  1 +
 7 files changed, 80 insertions(+), 116 deletions(-)
 create mode 100644 schema/resources/Classification.json
 create mode 100644 schema/resources/ClassificationTerm.json

diff --git a/schema/resources/Classification.json b/schema/resources/Classification.json
new file mode 100644
index 00000000..7bb2e1cd
--- /dev/null
+++ b/schema/resources/Classification.json
@@ -0,0 +1,9 @@
+{
+  "title": "Classification",
+  "type": "object",
+  "properties": {
+      "archive": {"$ref": "./ClassificationTerm.json"},
+      "group": {"$ref": "./ClassificationTerm.json"},
+      "category": {"$ref": "./ClassificationTerm.json"}
+  }
+}
diff --git a/schema/resources/ClassificationTerm.json b/schema/resources/ClassificationTerm.json
new file mode 100644
index 00000000..5f0a686b
--- /dev/null
+++ b/schema/resources/ClassificationTerm.json
@@ -0,0 +1,13 @@
+{
+  "title": "ClassificationTerm",
+  "type": "object",
+  "properties": {
+    "id": {
+      "type": "string"
+    },
+    "name": {
+      "type": "string"
+    }
+  },
+  "required": ["id", "name"]
+}
diff --git a/schema/resources/Document.json b/schema/resources/Document.json
index c55ead5f..53330af2 100644
--- a/schema/resources/Document.json
+++ b/schema/resources/Document.json
@@ -111,11 +111,11 @@
         "uniqueItems": true
       },
       "primary_classification": {
-        "$ref": "./Document.json#/definitions/classification"
+        "$ref": "./Classification.json"
       },
       "secondary_classification": {
         "type": "array",
-        "items": {"$ref": "./Document.json#/definitions/classification"}
+        "items": {"$ref": "./Classification.json"}
       },
       "report_num": {
         "type": "string"
@@ -188,51 +188,5 @@
       "href",
       "canonical"
     ]
-  },
-  "definitions": {
-    "category": {
-      "type": "object",
-      "properties": {
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        }
-      },
-      "required": ["id", "name"]
-    },
-    "archive": {
-      "type": "object",
-      "properties": {
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        }
-      },
-      "required": ["id", "name"]
-    },
-    "group": {
-      "type": "object",
-      "properties": {
-        "id": {
-          "type": "string"
-        },
-        "name": {
-          "type": "string"
-        }
-      },
-      "required": ["id", "name"]
-    },
-    "classification": {
-      "type": "object",
-      "properties": {
-        "archive": {"$ref": "#/definitions/archive"},
-        "group": {"$ref": "#/definitions/group"},
-        "category": {"$ref": "#/definitions/category"}
-      }
-    }
   }
 }
diff --git a/schema/resources/DocumentMetadata.json b/schema/resources/DocumentMetadata.json
index 0f6fb3ab..7a957ae9 100644
--- a/schema/resources/DocumentMetadata.json
+++ b/schema/resources/DocumentMetadata.json
@@ -1,52 +1,6 @@
 {
     "title": "DocumentMetadata",
     "description": "Schema for arXiv document metadata provided by the docmeta endpoint.",
-    "definitions": {
-        "category": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
-        },
-        "archive": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
-        },
-        "group": {
-            "type": "object",
-            "properties": {
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                }
-            },
-            "required": ["id", "name"]
-        },
-        "classification": {
-            "type": "object",
-            "properties": {
-                "archive": {"$ref": "#/definitions/archive"},
-                "group": {"$ref": "#/definitions/group"},
-                "category": {"$ref": "#/definitions/category"}
-            }
-        }
-    },
     "type": "object",
     "properties": {
         "abs_categories": {
@@ -216,11 +170,11 @@
             "type": "string"
         },
         "primary_classification": {
-            "$ref": "#/definitions/classification"
+            "$ref": "./Classification.json"
         },
         "secondary_classification": {
             "type": "array",
-            "items": {"$ref": "#/definitions/classification"}
+            "items": {"$ref": "./Classification.json"}
         },
         "proxy": {
             "type": "string"
diff --git a/schema/resources/DocumentSet.json b/schema/resources/DocumentSet.json
index b664a7b1..27576192 100644
--- a/schema/resources/DocumentSet.json
+++ b/schema/resources/DocumentSet.json
@@ -18,6 +18,21 @@
         "total": {
           "description": "Total number of documents that respond to this query.",
           "type": "integer"
+        },
+        "query": {
+          "description": "Query parameters interpreted from the request.",
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "parameter": {
+                "type": "string"
+              },
+              "value": {
+                "type": "string"
+              }
+            }
+          }
         }
       }
     },
@@ -25,7 +40,7 @@
       "type": "array",
       "items": {
         "type": "object",
-        "$ref": "Document.json#Document"
+        "$ref": "Document.json"
       }
     }
   }
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index c24eb867..8222854f 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -45,30 +45,35 @@ def search(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
         Extra headers for the response.
     """
     q = APIQuery()
-    terms = _get_fielded_terms(params)
+    query_terms = []
+    terms = _get_fielded_terms(params, query_terms)
     if terms is not None:
         q.terms = terms
-    date_range = _get_date_params(params)
+    date_range = _get_date_params(params, query_terms)
     if date_range is not None:
         q.date_range = date_range
 
-    primary_query = params.get('primary_classification')
-    if primary_query:
-        primary_classification = _get_classification(primary_query)
+    primary = params.get('primary_classification')
+    if primary:
+        primary_classification = _get_classification(primary,
+                                                     'primary_classification',
+                                                     query_terms)
         q.primary_classification = primary_classification
 
-    secondary_query = params.getlist('secondary_classification')
-    if secondary_query:
-        q.secondary_classification = list(
-            map(_get_classification, secondary_query)
-        )
+    secondaries = params.getlist('secondary_classification')
+    if secondaries:
+        q.secondary_classification = [
+            _get_classification(sec, 'secondary_classification', query_terms)
+            for sec in secondaries
+        ]
 
-    include_fields = _get_include_fields(params)
+    include_fields = _get_include_fields(params, query_terms)
     if include_fields:
         q.include_fields += include_fields
 
     q = paginate(q, params)     # type: ignore
     document_set = index.search(q, highlight=False)
+    document_set.metadata['query'] = query_terms
     logger.debug('Got document set with %i results', len(document_set.results))
     return {'results': document_set, 'query': q}, status.HTTP_200_OK, {}
 
@@ -105,19 +110,24 @@ def paper(paper_id: str) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
     return {'results': document}, status.HTTP_200_OK, {}
 
 
-def _get_include_fields(params: MultiDict) -> List[str]:
+def _get_include_fields(params: MultiDict, query_terms: List) -> List[str]:
     include_fields = params.getlist('include')
     allowed_fields = Document.fields()
     if include_fields:
-        return [field for field in include_fields if field in allowed_fields]
+        inc = [field for field in include_fields if field in allowed_fields]
+        for field in inc:
+            query_terms.append({'parameter': 'include', 'value': field})
+        return inc
     return []
 
 
-def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
+def _get_fielded_terms(params: MultiDict, query_terms: List) \
+        -> Optional[FieldedSearchList]:
     terms = FieldedSearchList()
     for field, _ in Query.SUPPORTED_FIELDS:
         values = params.getlist(field)
         for value in values:
+            query_terms.append({'parameter': field, 'value': value})
             terms.append(FieldedSearchTerm(     # type: ignore
                 operator='AND',
                 field=field,
@@ -128,7 +138,8 @@ def _get_fielded_terms(params: MultiDict) -> Optional[FieldedSearchList]:
     return terms
 
 
-def _get_date_params(params: MultiDict) -> Optional[DateRange]:
+def _get_date_params(params: MultiDict, query_terms: List) \
+        -> Optional[DateRange]:
     date_params = {}
     for field in ['start_date', 'end_date']:
         value = params.getlist(field)
@@ -142,14 +153,18 @@ def _get_date_params(params: MultiDict) -> Optional[DateRange]:
         except ValueError:
             raise BadRequest({'field': field, 'reason': 'invalid datetime'})
         date_params[field] = dt
+        query_terms.append({'parameter': field, 'value': dt})
     if 'date_type' in params:
         date_params['date_type'] = params.get('date_type')
+        query_terms.append({'parameter': 'date_type',
+                            'value': date_params['date_type']})
     if date_params:
         return DateRange(**date_params)  # type: ignore
     return None
 
 
-def _to_classification(value: List[str]) -> Tuple[Classification]:
+def _to_classification(value: List[str], query_terms: List) \
+        -> Tuple[Classification]:
     clsns = []
     if value in taxonomy.definitions.GROUPS:
         klass = taxonomy.Group
@@ -171,11 +186,14 @@ def _to_classification(value: List[str]) -> Tuple[Classification]:
     return tuple(clsns)
 
 
-def _get_classification(value: str) -> Tuple[Classification]:
+def _get_classification(value: str, field: str, query_terms: List) \
+        -> Tuple[Classification]:
     try:
-        return _to_classification(value)
+        clsns = _to_classification(value, query_terms)
     except ValueError:
         raise BadRequest({
-            'field': 'primary_classification',
+            'field': field,
             'reason': 'not a valid classification term'
         })
+    query_terms.append({'parameter': field, 'value': value})
+    return clsns
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 369b1c94..f9b754f7 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -148,6 +148,7 @@ def serialize(cls, document_set: DocumentSet,
                 'end': document_set.metadata.get('end'),
                 'size': document_set.metadata.get('size'),
                 'total': document_set.metadata.get('total'),
+                'query': document_set.metadata.get('query')
             },
         })
         return serialized

From 1eca21aff53834aeb64f132b191bbb42fd00e57b Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 07:57:27 -0500
Subject: [PATCH 58/77] fixed minor bug

---
 search/routes/api/serialize.py |  2 +-
 search/tests/test_api.py       | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 search/tests/test_api.py

diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index f9b754f7..910d172a 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -148,7 +148,7 @@ def serialize(cls, document_set: DocumentSet,
                 'end': document_set.metadata.get('end'),
                 'size': document_set.metadata.get('size'),
                 'total': document_set.metadata.get('total'),
-                'query': document_set.metadata.get('query')
+                'query': document_set.metadata.get('query', [])
             },
         })
         return serialized
diff --git a/search/tests/test_api.py b/search/tests/test_api.py
new file mode 100644
index 00000000..31cd45a2
--- /dev/null
+++ b/search/tests/test_api.py
@@ -0,0 +1,13 @@
+from unittest import TestCase, mock
+
+from arxiv import taxonomy, status
+from search.factory import create_api_web_app
+
+
+class TestAdvancedSearch(TestCase):
+    """Tests for the search API."""
+
+    def setUp(self):
+        """Instantiate the API application."""
+        self.app = create_api_web_app()
+        self.client = self.app.test_client()

From beb35ef3bb8a6b8615300d3a424f6eefe0b733e9 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 08:08:20 -0500
Subject: [PATCH 59/77] added examples to openapi schema

---
 schema/search.yaml       | 256 +++++++++++++++++++++++++++++++++++++++
 search/tests/test_api.py |   5 +
 2 files changed, 261 insertions(+)

diff --git a/schema/search.yaml b/schema/search.yaml
index f4bef615..8f6d3306 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -20,6 +20,102 @@ paths:
       description: |
         Returns all published arXiv papers that respond to the specified
         query parameters. By default, returns most recent papers first.
+
+        ## Example request
+
+        ```bash
+        curl \
+          -H "Authorization: Bearer 4mggHnvB3ZV1bV3GObE6wZFw8pul5nGyzfeABSdfDg" \
+          https://api.arxiv.org/metadata/?size=5&license=http://arxiv.org/licenses/nonexclusive-distrib/1.0/&include=license
+
+        ```
+
+        ## Example response
+
+        ```json
+        {
+          "metadata": {
+            "end": 5,
+            "query": [
+              {
+                "parameter": "license",
+                "value": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/"
+              },
+              {
+                "parameter": "include",
+                "value": "license"
+              }
+            ],
+            "size": 5,
+            "start": 0,
+            "total": 993119
+          },
+          "results": [
+            {
+              "canonical": "https://arxiv.org/abs/1812.01565v1",
+              "href": "http://127.0.0.1:5000/1812.01565v1",
+              "license": {
+                "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+                "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+              },
+              "paper_id": "1812.01565",
+              "paper_id_v": "1812.01565v1",
+              "title": "Impact of radiation backgrounds on the formation of massive black holes",
+              "version": 1
+            },
+            {
+              "canonical": "https://arxiv.org/abs/1812.03980v1",
+              "href": "http://127.0.0.1:5000/1812.03980v1",
+              "license": {
+                "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+                "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+              },
+              "paper_id": "1812.03980",
+              "paper_id_v": "1812.03980v1",
+              "title": "Building Ethically Bounded AI",
+              "version": 1
+            },
+            {
+              "canonical": "https://arxiv.org/abs/1812.03942v1",
+              "href": "http://127.0.0.1:5000/1812.03942v1",
+              "license": {
+                "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+                "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+              },
+              "paper_id": "1812.03942",
+              "paper_id_v": "1812.03942v1",
+              "title": "Accurate Evaluation of $\\mathcal{P}$,$\\mathcal{T}$-odd Faraday Effect in Atoms of Xe and Hg",
+              "version": 1
+            },
+            {
+              "canonical": "https://arxiv.org/abs/1812.03969v1",
+              "href": "http://127.0.0.1:5000/1812.03969v1",
+              "license": {
+                "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+                "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+              },
+              "paper_id": "1812.03969",
+              "paper_id_v": "1812.03969v1",
+              "title": "Modified gravity, gravitational waves and the large-scale structure of the Universe: A brief report",
+              "version": 1
+            },
+            {
+              "canonical": "https://arxiv.org/abs/1812.03956v1",
+              "href": "http://127.0.0.1:5000/1812.03956v1",
+              "license": {
+                "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+                "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+              },
+              "paper_id": "1812.03956",
+              "paper_id_v": "1812.03956v1",
+              "title": "X-ray reflectivity with a twist: quantitative time-resolved X-ray reflectivity using monochromatic synchrotron radiation",
+              "version": 1
+            }
+          ]
+        }
+
+        ```
+
       parameters:
         - name: all
           in: query
@@ -47,6 +143,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: sinskaja, e n
 
         - name: title
           in: query
@@ -58,6 +155,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: "theory of life"
 
         - name: abstract
           in: query
@@ -69,6 +167,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: abstr?ct
 
         - name: comments
           in: query
@@ -81,6 +180,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: "color figures"
 
         - name: journal_ref
           in: query
@@ -93,6 +193,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: "j cool beans"
 
         - name: report_num
           in: query
@@ -105,6 +206,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: SU-4240-720
 
         - name: acm_class
           in: query
@@ -116,6 +218,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: F.2.2
 
         - name: msc_class
           in: query
@@ -127,6 +230,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: 14J60
 
         - name: doi
           in: query
@@ -138,6 +242,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: 10.1016/S0550-3213(01)00405-9
 
         - name: paper_id
           in: query
@@ -150,6 +255,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: 1601.00123
 
         - name: orcid
           in: query
@@ -164,6 +270,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: 0000-0002-0564-9939
 
         - name: author_id
           in: query
@@ -178,6 +285,7 @@ paths:
           explode: true
           schema:
             type: string
+          example: warner_s_1
 
         - name: primary_classification
           in: query
@@ -249,6 +357,7 @@ paths:
             type: array
             items:
               type: string
+          example: title
 
         - name: start_date
           in: query
@@ -260,6 +369,7 @@ paths:
           schema:
             type: string
             format: date
+          example: "1998-04-03"
 
         - name: end_date
           in: query
@@ -271,6 +381,7 @@ paths:
           schema:
             type: string
             format: date
+          example: "1998-04-09"
 
         - name: date_type
           in: query
@@ -287,6 +398,7 @@ paths:
               - submitted_date_first
               - submitted_date
               - announced_date_first
+          example: submitted_date_first
 
       responses:
         '200':
@@ -307,6 +419,150 @@ paths:
         Get metadata about an arXiv paper by arXiv ID. See
         https://arxiv.org/help/arxiv_identifier for information about arXiv
         paper identifiers.
+
+        ## Example request
+
+        ```bash
+        curl \
+          -H "Authorization: Bearer 4mggHnvB3ZV1bV3GOaE8wZFw8pul5nGyzfeABSdfDg" \
+          https://api.arxiv.org/metadata/1811.00536v1
+        ```
+
+        ## Example response
+        ```json
+        {
+          "abs_categories": "cond-mat.str-el hep-th math-ph math.MP",
+          "abstract": "We give a systematic construction and classification of fermionic symmetry-protected topological states for generic fermionic symmetry group $G_f=\\mathbb Z_2^f\\rtimes G_b$, which is a central extension of bosonic symmetry group $G_b$ (may contain time reversal symmetry) by the fermion parity symmetry group $\\mathbb Z_2^f=\\{1,P_f\\}$. For each class in the classification (except those with 2D $p+ip$ chiral superconductor decorations), we construct a fixed-point wave function which admits exactly solvable commuting-projector Hamiltonian. The classification is based on the notion of equivalence class of fermionic symmetric local unitary transformations.",
+          "acm_class": [],
+          "announced_date_first": "2018-11",
+          "authors": [
+            {
+              "affiliation": [],
+              "author_id": null,
+              "first_name": "Qing-Rui",
+              "full_name": "Qing-Rui Wang",
+              "last_name": "Wang",
+              "orcid": null,
+              "suffix": ""
+            },
+            {
+              "affiliation": [],
+              "author_id": null,
+              "first_name": "Zheng-Cheng",
+              "full_name": "Zheng-Cheng Gu",
+              "last_name": "Gu",
+              "orcid": null,
+              "suffix": ""
+            }
+          ],
+          "authors_freeform": "Qing-Rui Wang and Zheng-Cheng Gu",
+          "canonical": "https://arxiv.org/abs/1811.00536v1",
+          "comments": "68 pages, 16 figures",
+          "doi": [],
+          "formats": [
+            {
+              "format": "pdf",
+              "href": "https://arxiv.org/pdf/1811.00536v1"
+            },
+            {
+              "format": "other",
+              "href": "https://arxiv.org/format/1811.00536v1"
+            }
+          ],
+          "href": "https://api.arxiv.org/metadata/1811.00536v1",
+          "is_current": true,
+          "is_withdrawn": false,
+          "journal_ref": "",
+          "latest": {
+            "canonical": "https://arxiv.org/abs/1811.00536v1",
+            "href": "https://api.arxiv.org/metadata/1811.00536v1",
+            "paper_id": "1811.00536v1",
+            "version": 1
+          },
+          "license": {
+            "href": "http://arxiv.org/licenses/nonexclusive-distrib/1.0/",
+            "label": "arXiv.org perpetual, non-exclusive license to distribute this article"
+          },
+          "msc_class": [],
+          "owners": [
+            {
+              "affiliation": [],
+              "author_id": null,
+              "first_name": "Qing-Rui",
+              "full_name": "Qing-Rui Wang",
+              "last_name": "Wang",
+              "orcid": null,
+              "suffix": ""
+            }
+          ],
+          "paper_id": "1811.00536",
+          "paper_id_v": "1811.00536v1",
+          "primary_classification": {
+            "archive": {
+              "id": "cond-mat",
+              "name": "Condensed Matter"
+            },
+            "category": {
+              "id": "cond-mat.str-el",
+              "name": "Strongly Correlated Electrons"
+            },
+            "group": {
+              "id": "grp_physics",
+              "name": "Physics"
+            }
+          },
+          "report_num": "",
+          "secondary_classification": [
+            {
+              "archive": {
+                "id": "hep-th",
+                "name": "High Energy Physics - Theory"
+              },
+              "category": {
+                "id": "hep-th",
+                "name": "High Energy Physics - Theory"
+              },
+              "group": {
+                "id": "grp_physics",
+                "name": "Physics"
+              }
+            },
+            {
+              "archive": {
+                "id": "math-ph",
+                "name": "Mathematical Physics"
+              },
+              "category": {
+                "id": "math-ph",
+                "name": "Mathematical Physics"
+              },
+              "group": {
+                "id": "grp_physics",
+                "name": "Physics"
+              }
+            }
+          ],
+          "source": {
+            "flags": "",
+            "format": "pdftex",
+            "size_bytes": 2478991
+          },
+          "submitted_date": "2018-11-01T13:57:22-04:00",
+          "submitted_date_first": "2018-11-01T13:57:22-04:00",
+          "submitter": {
+            "affiliation": [],
+            "author_id": null,
+            "first_name": "",
+            "full_name": "Qing-Rui Wang",
+            "last_name": "",
+            "orcid": null,
+            "suffix": ""
+          },
+          "title": "General group super-cohomology theory of fermionic symmetry-protected topological phases",
+          "version": 1
+        }
+        ```
+
       operationId: getPaperByID
       parameters:
         - name: id
diff --git a/search/tests/test_api.py b/search/tests/test_api.py
index 31cd45a2..05cdbacf 100644
--- a/search/tests/test_api.py
+++ b/search/tests/test_api.py
@@ -1,6 +1,8 @@
 from unittest import TestCase, mock
 
 from arxiv import taxonomy, status
+from arxiv.users.auth import scopes
+from arxiv.users.helpers import generate_token
 from search.factory import create_api_web_app
 
 
@@ -11,3 +13,6 @@ def setUp(self):
         """Instantiate the API application."""
         self.app = create_api_web_app()
         self.client = self.app.test_client()
+
+        self.token = generate_token('1', 'u@ser.com', 'theuser',
+                                    scope=[scopes.READ_PUBLIC])

From dc9d602186e10ee04df60d3005169435d4ceb54d Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 08:14:40 -0500
Subject: [PATCH 60/77] fixed style and typing issues

---
 search/controllers/api/__init__.py | 18 +++++++++---------
 search/domain/api.py               |  1 +
 search/services/index/prepare.py   |  2 ++
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index 8222854f..fcaf23e9 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -45,7 +45,7 @@ def search(params: MultiDict) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
         Extra headers for the response.
     """
     q = APIQuery()
-    query_terms = []
+    query_terms: List[Dict[str, Any]] = []
     terms = _get_fielded_terms(params, query_terms)
     if terms is not None:
         q.terms = terms
@@ -163,8 +163,8 @@ def _get_date_params(params: MultiDict, query_terms: List) \
     return None
 
 
-def _to_classification(value: List[str], query_terms: List) \
-        -> Tuple[Classification]:
+def _to_classification(value: str, query_terms: List) \
+        -> Tuple[Classification, ...]:
     clsns = []
     if value in taxonomy.definitions.GROUPS:
         klass = taxonomy.Group
@@ -177,17 +177,17 @@ def _to_classification(value: List[str], query_terms: List) \
         field = 'category'
     else:
         raise ValueError('not a valid classification')
-    value = klass(value)
+    cast_value = klass(value)
     clsns.append(Classification(**{field: {'id': value}}))   # type: ignore
-    if value.unalias != value:
-        clsns.append(Classification(**{field: {'id': value.unalias()}}))   # type: ignore
-    if value.canonical != value:
-        clsns.append(Classification(**{field: {'id': value.canonical}}))   # type: ignore
+    if cast_value.unalias != value:
+        clsns.append(Classification(**{field: {'id': cast_value.unalias()}}))   # type: ignore
+    if cast_value.canonical != value:
+        clsns.append(Classification(**{field: {'id': cast_value.canonical}}))   # type: ignore
     return tuple(clsns)
 
 
 def _get_classification(value: str, field: str, query_terms: List) \
-        -> Tuple[Classification]:
+        -> Tuple[Classification, ...]:
     try:
         clsns = _to_classification(value, query_terms)
     except ValueError:
diff --git a/search/domain/api.py b/search/domain/api.py
index 13cd9a4f..6e29b3fc 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -24,6 +24,7 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
+    
     date_range: Optional[DateRange] = None
     primary_classification: Tuple[Classification, ...] = \
         field(default_factory=tuple)
diff --git a/search/services/index/prepare.py b/search/services/index/prepare.py
index c06220cf..5ae48294 100644
--- a/search/services/index/prepare.py
+++ b/search/services/index/prepare.py
@@ -124,6 +124,7 @@ def _query_primary(term: str, operator: str = 'and') -> Q:
 
 
 def query_primary_exact(classification: Classification) -> Q:
+    """Generate a :class:`Q` for primary classification by ID."""
     return reduce(iand, [
         Q("match", **{f"primary_classification__{field}__id":
                       getattr(classification, field)['id']})
@@ -133,6 +134,7 @@ def query_primary_exact(classification: Classification) -> Q:
 
 
 def query_secondary_exact(classification: Classification) -> Q:
+    """Generate a :class:`Q` for secondary classification by ID."""
     return Q("nested", path="secondary_classification",
              query=reduce(iand, [
                  Q("match", **{f"secondary_classification__{field}__id":

From 4e55d39b3b09cc9513de7c194f8350e90b4d41ab Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 08:45:14 -0500
Subject: [PATCH 61/77] added some tests

---
 search/controllers/api/__init__.py |   5 +-
 search/controllers/api/tests.py    | 144 +++++++++++++++++++++++++++++
 search/domain/api.py               |   1 -
 search/tests/test_api.py           |  18 ----
 4 files changed, 147 insertions(+), 21 deletions(-)
 create mode 100644 search/controllers/api/tests.py
 delete mode 100644 search/tests/test_api.py

diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index fcaf23e9..d689f4e7 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -179,9 +179,10 @@ def _to_classification(value: str, query_terms: List) \
         raise ValueError('not a valid classification')
     cast_value = klass(value)
     clsns.append(Classification(**{field: {'id': value}}))   # type: ignore
-    if cast_value.unalias != value:
+    if cast_value.unalias() != cast_value:
         clsns.append(Classification(**{field: {'id': cast_value.unalias()}}))   # type: ignore
-    if cast_value.canonical != value:
+    if cast_value.canonical != cast_value \
+            and cast_value.canonical != cast_value.unalias():
         clsns.append(Classification(**{field: {'id': cast_value.canonical}}))   # type: ignore
     return tuple(clsns)
 
diff --git a/search/controllers/api/tests.py b/search/controllers/api/tests.py
new file mode 100644
index 00000000..6b9cb24b
--- /dev/null
+++ b/search/controllers/api/tests.py
@@ -0,0 +1,144 @@
+"""Tests for advanced search controller, :mod:`search.controllers.advanced`."""
+
+from unittest import TestCase, mock
+from datetime import date, datetime
+from dateutil.relativedelta import relativedelta
+from werkzeug import MultiDict
+from werkzeug.exceptions import InternalServerError, BadRequest
+
+from arxiv import status
+
+from search.domain import Query, DateRange, FieldedSearchTerm, Classification,\
+    AdvancedQuery, DocumentSet
+from search.controllers import api
+from search.domain import api as api_domain
+from search.services.index import IndexConnectionError, QueryError
+
+
+class TestAPISearch(TestCase):
+    """Tests for :func:`.api.search`."""
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_no_params(self, mock_index):
+        """Request with no parameters."""
+        params = MultiDict({})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        self.assertIn("results", data, "Results are returned")
+        self.assertIn("query", data, "Query object is returned")
+        expected_fields = api_domain.get_required_fields() \
+            + api_domain.get_default_extra_fields()
+        self.assertEqual(set(data["query"].include_fields),
+                         set(expected_fields),
+                         "Default set of fields is included")
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_include_fields(self, mock_index):
+        """Request with specific fields included."""
+        extra_fields = ['title', 'abstract', 'authors']
+        params = MultiDict({'include': extra_fields})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        self.assertIn("results", data, "Results are returned")
+        self.assertIn("query", data, "Query object is returned")
+        expected_fields = api_domain.get_required_fields() + extra_fields
+        self.assertEqual(set(data["query"].include_fields),
+                         set(expected_fields),
+                         "Requested fields are included")
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_group_primary_classification(self, mock_index):
+        """Request with a group as primary classification."""
+        group = 'grp_physics'
+        params = MultiDict({'primary_classification': group})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertEqual(len(query.primary_classification), 1)
+        self.assertEqual(query.primary_classification[0],
+                         Classification(group={'id': group}))
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_archive_primary_classification(self, mock_index):
+        """Request with an archive as primary classification."""
+        archive = 'physics'
+        params = MultiDict({'primary_classification': archive})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertEqual(len(query.primary_classification), 1)
+        self.assertEqual(query.primary_classification[0],
+                         Classification(archive={'id': archive}))
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_archive_subsumed_classification(self, mock_index):
+        """Request with a subsumed archive as primary classification."""
+        archive = 'chao-dyn'
+        params = MultiDict({'primary_classification': archive})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertEqual(len(query.primary_classification), 2)
+        self.assertEqual(query.primary_classification[0],
+                         Classification(archive={'id': archive}))
+        self.assertEqual(query.primary_classification[1],
+                         Classification(archive={'id': 'nlin.CD'}),
+                         "The canonical archive is used instead")
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_category_primary_classification(self, mock_index):
+        """Request with a category as primary classification."""
+        category = 'cs.DL'
+        params = MultiDict({'primary_classification': category})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertEqual(len(query.primary_classification), 1)
+        self.assertEqual(query.primary_classification[0],
+                         Classification(category={'id': category}))
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_bad_classification(self, mock_index):
+        """Request with nonsense as primary classification."""
+        params = MultiDict({'primary_classification': 'nonsense'})
+        with self.assertRaises(BadRequest):
+            api.search(params)
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_with_start_date(self, mock_index):
+        """Request with dates specified."""
+        params = MultiDict({'start_date': '1999-01-02'})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertIsNotNone(query.date_range)
+        self.assertEqual(query.date_range.start_date.year, 1999)
+        self.assertEqual(query.date_range.start_date.month, 1)
+        self.assertEqual(query.date_range.start_date.day, 2)
+        self.assertEqual(query.date_range.date_type,
+                         DateRange.SUBMITTED_CURRENT,
+                         "Submitted date of current version is the default")
+
+    @mock.patch(f'{api.__name__}.index')
+    def test_with_end_dates_and_type(self, mock_index):
+        """Request with end date and date type specified."""
+        params = MultiDict({'end_date': '1999-01-02',
+                            'date_type': 'announced_date_first'})
+        data, code, headers = api.search(params)
+
+        self.assertEqual(code, status.HTTP_200_OK, "Returns 200 OK")
+        query = mock_index.search.call_args[0][0]
+        self.assertIsNotNone(query.date_range)
+        self.assertEqual(query.date_range.end_date.year, 1999)
+        self.assertEqual(query.date_range.end_date.month, 1)
+        self.assertEqual(query.date_range.end_date.day, 2)
+
+        self.assertEqual(query.date_range.date_type,
+                         DateRange.ANNOUNCED)
diff --git a/search/domain/api.py b/search/domain/api.py
index 6e29b3fc..13cd9a4f 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -24,7 +24,6 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
-    
     date_range: Optional[DateRange] = None
     primary_classification: Tuple[Classification, ...] = \
         field(default_factory=tuple)
diff --git a/search/tests/test_api.py b/search/tests/test_api.py
deleted file mode 100644
index 05cdbacf..00000000
--- a/search/tests/test_api.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from unittest import TestCase, mock
-
-from arxiv import taxonomy, status
-from arxiv.users.auth import scopes
-from arxiv.users.helpers import generate_token
-from search.factory import create_api_web_app
-
-
-class TestAdvancedSearch(TestCase):
-    """Tests for the search API."""
-
-    def setUp(self):
-        """Instantiate the API application."""
-        self.app = create_api_web_app()
-        self.client = self.app.test_client()
-
-        self.token = generate_token('1', 'u@ser.com', 'theuser',
-                                    scope=[scopes.READ_PUBLIC])

From e82633bd233698ea903de954c7d2266dd2f6a412 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 08:57:00 -0500
Subject: [PATCH 62/77] fixed docstyle issue

---
 search/domain/api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/search/domain/api.py b/search/domain/api.py
index 13cd9a4f..1d7604c9 100644
--- a/search/domain/api.py
+++ b/search/domain/api.py
@@ -24,6 +24,7 @@ class APIQuery(Query):
 
     Similar to an advanced query.
     """
+
     date_range: Optional[DateRange] = None
     primary_classification: Tuple[Classification, ...] = \
         field(default_factory=tuple)

From c04fc43783e383db9fb7bcb81a6b3aee9eab9570 Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Thu, 20 Dec 2018 10:19:32 -0500
Subject: [PATCH 63/77] ARXIVNG-1349 mobile styling fix ffor results

---
 search/static/css/search.css     | 7 ++++++-
 search/static/css/search.css.map | 2 +-
 search/static/sass/search.sass   | 5 +++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/search/static/css/search.css b/search/static/css/search.css
index 4f1fba4f..6095ded5 100644
--- a/search/static/css/search.css
+++ b/search/static/css/search.css
@@ -2,12 +2,17 @@
   margin-bottom: 1.25em; }
   .content .arxiv-result p {
     margin-bottom: 0.25em; }
+  .content .arxiv-result .title {
+    line-height: 1.25em; }
   .content .arxiv-result .tags {
     margin-bottom: 0; }
+    .content .arxiv-result .tags:not(:last-child) {
+      margin-bottom: 0; }
   .content .arxiv-result span.tag {
     height: 1.25rem; }
   .content .arxiv-result .list-title {
-    font-size: 1.05em; }
+    font-size: 1.05em;
+    line-height: 1.25em; }
 
 .search-hit {
   color: #287916 !important;
diff --git a/search/static/css/search.css.map b/search/static/css/search.css.map
index b74e7dcf..283b7c5f 100644
--- a/search/static/css/search.css.map
+++ b/search/static/css/search.css.map
@@ -1,6 +1,6 @@
 {
 "version": 3,
-"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,4BAAK;IACH,aAAa,EAAE,CAAC;EAClB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;;AAEvB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
+"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,6BAAM;IACJ,WAAW,EAAE,MAAM;EACrB,4BAAK;IACH,aAAa,EAAE,CAAC;IAChB,6CAAkB;MAChB,aAAa,EAAE,CAAC;EACpB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;IACjB,WAAW,EAAE,MAAM;;AAEzB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
 "sources": ["../sass/search.sass"],
 "names": [],
 "file": "search.css"
diff --git a/search/static/sass/search.sass b/search/static/sass/search.sass
index 9402c9e5..8c01fb60 100644
--- a/search/static/sass/search.sass
+++ b/search/static/sass/search.sass
@@ -3,12 +3,17 @@
     margin-bottom: 1.25em
     p
       margin-bottom: .25em
+    .title
+      line-height: 1.25em
     .tags
       margin-bottom: 0
+      &:not(:last-child)
+        margin-bottom: 0
     span.tag
       height: 1.25rem
     .list-title
       font-size: 1.05em
+      line-height: 1.25em
 
 .search-hit
   color: #287916 !important

From 7bdf1f6acca41392db099c1a5357cfe0b7afefa1 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 10:26:04 -0500
Subject: [PATCH 64/77] updated dependencies

---
 Dockerfile       |  2 +-
 Dockerfile-agent |  2 +-
 Dockerfile-api   |  2 +-
 Dockerfile-index |  2 +-
 Pipfile          |  4 ++--
 Pipfile.lock     | 10 +++++-----
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 28de131b..ab3adbec 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,7 +3,7 @@
 # Defines the runtime for the arXiv search service, which provides the main
 # UIs (and, eventually, APIs) for search.
 
-FROM arxiv/base:0.12.1rc2
+FROM arxiv/base:0.12.1
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-agent b/Dockerfile-agent
index 60051f78..ebea1c2b 100644
--- a/Dockerfile-agent
+++ b/Dockerfile-agent
@@ -4,7 +4,7 @@
 #  article metadata becomes available. Subscribes to a Kinesis stream for
 #  notifications about new metadata.
 
-FROM arxiv/search:d2a1499bc7ce3ff9866b3e3c5583f700b8d130ff
+FROM arxiv/search:0.5.1
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-api b/Dockerfile-api
index 918b9f1f..edd4c7ea 100644
--- a/Dockerfile-api
+++ b/Dockerfile-api
@@ -3,7 +3,7 @@
 # Defines the runtime for the arXiv search API, which provides a metadata
 # query API backed by Elasticsearch.
 
-FROM arxiv/base:0.12.1rc2
+FROM arxiv/base:0.12.1
 
 WORKDIR /opt/arxiv
 
diff --git a/Dockerfile-index b/Dockerfile-index
index fbd048ba..daa5ba91 100644
--- a/Dockerfile-index
+++ b/Dockerfile-index
@@ -16,7 +16,7 @@
 #
 # See also ELASTICSEARCH_* and METADATA_ENDPOINT parameters, below.
 
-FROM arxiv/base:0.12.1rc2
+FROM arxiv/base:0.12.1
 
 # Add Python consumer and configuration.
 ADD requirements/prod.txt /opt/arxiv/requirements.txt
diff --git a/Pipfile b/Pipfile
index 571da1cc..1007f418 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,8 +4,8 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-base = "==0.12.1rc2"
-arxiv-auth = "==0.1.0rc14"
+arxiv-auth = "==0.2.1"
+arxiv-base = "==0.12.1"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 70b87fff..1341c718 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "59b8e0253b404fbf5bc25a8b9602e3ab61a9aabd1baa2a20768d000b7dda49bd"
+            "sha256": "4c3241a3b556d7e9216ac347d549203b49888c9e23f529b8281f7380760b3b78"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -16,17 +16,17 @@
     "default": {
         "arxiv-auth": {
             "hashes": [
-                "sha256:6a10165a775bdfca281a8ffac5aefe737e02b92f62c6ee95c5d793bae30dd457"
+                "sha256:19b5d5b4c226ef34986031f9f46836b5e7b44715db47b240b9af7c19a132cbb9"
             ],
             "index": "pypi",
-            "version": "==0.1.0rc14"
+            "version": "==0.2.1"
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:9896b4f54d4a5e20c5f7ab7e8c65aa022781f8eba149b9730896fe22a44e0535"
+                "sha256:f8fa599e50550e0c6ee9de53030c22c0b8921fca7ac1268753a6b2b0fef177e9"
             ],
             "index": "pypi",
-            "version": "==0.12.1rc2"
+            "version": "==0.12.1"
         },
         "bleach": {
             "hashes": [

From a68f2fac4cbc05e7b73c6ef4124e919e04d67623 Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Thu, 20 Dec 2018 12:42:56 -0500
Subject: [PATCH 65/77] minor tweaks to dependencies

---
 Dockerfile-index | 13 +------------
 Pipfile          |  2 +-
 Pipfile.lock     |  6 +++---
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/Dockerfile-index b/Dockerfile-index
index daa5ba91..41e9d4b8 100644
--- a/Dockerfile-index
+++ b/Dockerfile-index
@@ -16,20 +16,9 @@
 #
 # See also ELASTICSEARCH_* and METADATA_ENDPOINT parameters, below.
 
-FROM arxiv/base:0.12.1
-
-# Add Python consumer and configuration.
-ADD requirements/prod.txt /opt/arxiv/requirements.txt
-ADD app.py /opt/arxiv/
-RUN pip install -U pip
-RUN pip install -r /opt/arxiv/requirements.txt
+FROM arxiv/search:0.5.1
 
 ENV PATH "/opt/arxiv:${PATH}"
-
-ADD schema /opt/arxiv/schema
-ADD mappings /opt/arxiv/mappings
-ADD search /opt/arxiv/search
-ADD tests /opt/arxiv/tests
 ADD bulk_index.py /opt/arxiv/
 
 WORKDIR /opt/arxiv/
diff --git a/Pipfile b/Pipfile
index 1007f418..4fb12cf6 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-auth = "==0.2.1"
+arxiv-auth = "==0.2.3"
 arxiv-base = "==0.12.1"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 1341c718..d1fe2f73 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "4c3241a3b556d7e9216ac347d549203b49888c9e23f529b8281f7380760b3b78"
+            "sha256": "2821b0070a7746b316adfe7d71044ce7f3d081c12b4370eeb84e9e34a88391e4"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -16,10 +16,10 @@
     "default": {
         "arxiv-auth": {
             "hashes": [
-                "sha256:19b5d5b4c226ef34986031f9f46836b5e7b44715db47b240b9af7c19a132cbb9"
+                "sha256:6d4a5b3e27bf718a95c79e3fef92e7a148b5aca5a0719dca7bdb300c52ce7abb"
             ],
             "index": "pypi",
-            "version": "==0.2.1"
+            "version": "==0.2.3"
         },
         "arxiv-base": {
             "hashes": [

From 3db098d44fbaa9edabb0d3b6bb706be203fde57d Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jmm698@cornell.edu>
Date: Fri, 8 Feb 2019 12:08:14 -0500
Subject: [PATCH 66/77] updating gitignore for docs

---
 .gitignore | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 49a687db..8f5e8d4d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,7 +62,7 @@ instance/
 .scrapy
 
 # Sphinx documentation
-docs/_build/
+docs/source/_build/
 
 # PyBuilder
 target/
@@ -97,6 +97,10 @@ ENV/
 # mkdocs documentation
 /site
 
+# VS Code
+.vscode
+settings.json
+
 # mypy
 .mypy_cache/
 
@@ -105,3 +109,4 @@ temp/
 .DS_Store
 
 to_index/
+

From 93a6e6152185dc0d08a285c08874aa9d891697cd Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jmm698@cornell.edu>
Date: Fri, 8 Feb 2019 12:38:39 -0500
Subject: [PATCH 67/77] adding update-docs script, updating intersphinx refs

---
 docs/source/conf.py | 12 ++++++------
 update-docs.sh      | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100644 update-docs.sh

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5ab00a0c..b94ed978 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -183,10 +183,10 @@
 
 intersphinx_mapping = {
     'python':  ('https://docs.python.org/3.6', None),
-    'arxitecture':  ('https://cul-it.github.io/arxiv-arxitecture/', None),
-    'arxiv.taxonomy': ('https://cul-it.github.io/arxiv-base', None),
-    'arxiv.base':  ('https://cul-it.github.io/arxiv-base', None),
-    'browse':  ('https://cul-it.github.io/arxiv-browse/', None),
-    'search':  ('https://cul-it.github.io/arxiv-search/', None),
-    'zero':  ('https://cul-it.github.io/arxiv-zero/', None),
+    'arxitecture':  ('https://arXiv.github.io/arxiv-arxitecture/', None),
+    'arxiv.taxonomy': ('https://arXiv.github.io/arxiv-base', None),
+    'arxiv.base':  ('https://arXiv.github.io/arxiv-base', None),
+    'browse':  ('https://arXiv.github.io/arxiv-browse/', None),
+    'search':  ('https://arXiv.github.io/arxiv-search/', None),
+    'zero':  ('https://arXiv.github.io/arxiv-zero/', None),
 }
diff --git a/update-docs.sh b/update-docs.sh
new file mode 100644
index 00000000..619e6768
--- /dev/null
+++ b/update-docs.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+SRCDOCS=`pwd`/docs/source/_build/html
+REPO=arXiv/arxiv-search
+echo $SRCDOCS
+
+cd `pwd`/docs
+make html
+
+cd $SRCDOCS
+MSG="Adding gh-pages docs for `git log -1 --pretty=short --abbrev-commit`"
+
+TMPREPO=/tmp/docs/$REPO
+rm -rf $TMPREPO
+mkdir -p -m 0755 $TMPREPO
+echo $MSG
+
+git clone git@github.com:$REPO.git $TMPREPO
+cd $TMPREPO
+
+## checkout the branch if it exists, if not then create it and detach it from the history
+if ! git checkout gh-pages; then
+    git checkout --orphan gh-pages
+    git rm -rf .
+    touch .nojekyll
+    git add .nojekyll
+else
+    git checkout gh-pages  ###gh-pages has previously one off been set to be nothing but html
+fi
+
+cp -r $SRCDOCS/* $TMPREPO
+git add -A
+git commit -m "$MSG" && git push origin gh-pages

From 07624e000f373c6fdf7b87ae24a5350aa4e46a93 Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Wed, 13 Feb 2019 10:22:42 -0500
Subject: [PATCH 68/77] update help search redirect for new help pages (#227)

---
 search/controllers/simple/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/search/controllers/simple/__init__.py b/search/controllers/simple/__init__.py
index e871ada2..85f41cb5 100644
--- a/search/controllers/simple/__init__.py
+++ b/search/controllers/simple/__init__.py
@@ -110,8 +110,7 @@ def search(request_params: MultiDict,
         # Temporary workaround to support classic help search
         if form.searchtype.data == 'help':
             return {}, status.HTTP_301_MOVED_PERMANENTLY,\
-                {'Location': 'https://arxiv.org/help/search?method=and'
-                 f'&format=builtin-short&sort=score&words={form.query.data}'}
+                {'Location': f'/help/search?q={form.query.data}'}
 
         # Support classic "expeirmental" search
         elif form.searchtype.data == 'full_text':

From 4129fbbc1e785176c41f5315c419ab2afd827aed Mon Sep 17 00:00:00 2001
From: erickpeirson <brp53@cornell.edu>
Date: Mon, 25 Feb 2019 16:13:07 -0500
Subject: [PATCH 69/77] ARXIVNG-1519 tuning search API for production

---
 Dockerfile-api                 |  23 ++-----
 Pipfile                        |   2 +-
 Pipfile.lock                   | 120 ++++++++++++++++-----------------
 config/uwsgi-api.ini           |  14 ++++
 search/routes/api/serialize.py |   2 +-
 5 files changed, 78 insertions(+), 83 deletions(-)
 create mode 100644 config/uwsgi-api.ini

diff --git a/Dockerfile-api b/Dockerfile-api
index edd4c7ea..2f7e38ac 100644
--- a/Dockerfile-api
+++ b/Dockerfile-api
@@ -3,7 +3,7 @@
 # Defines the runtime for the arXiv search API, which provides a metadata
 # query API backed by Elasticsearch.
 
-FROM arxiv/base:0.12.1
+FROM arxiv/base:0.14.3
 
 WORKDIR /opt/arxiv
 
@@ -22,11 +22,7 @@ ENV PATH "/opt/arxiv:${PATH}"
 ADD schema /opt/arxiv/schema
 ADD mappings /opt/arxiv/mappings
 ADD search /opt/arxiv/search
-ADD wsgi-api.py /opt/arxiv/wsgi.py
-RUN pip install uwsgi
-
-ADD bin/start_search.sh /opt/arxiv/
-RUN chmod +x /opt/arxiv/start_search.sh
+ADD wsgi-api.py config/uwsgi-api.ini /opt/arxiv/
 
 ENV LC_ALL en_US.utf8
 ENV LANG en_US.utf8
@@ -44,16 +40,5 @@ ENV METADATA_ENDPOINT https://arxiv.org/docmeta_bulk/
 
 EXPOSE 8000
 
-#CMD /bin/bash
-ENTRYPOINT ["/opt/arxiv/start_search.sh"]
-CMD ["--http-socket", ":8000", \
-     "-M", \
-     "-t 3000", \
-     "--manage-script-name", \
-     "--processes", "8", \
-     "--threads", "1", \
-     "--async", "100", \
-     "--ugreen", \
-     "--buffer-size", "65535", \
-     "--mount", "/metadata=wsgi.py", \
-     "--logformat", "%(addr) %(addr) - %(user_id)|%(session_id) [%(rtime)] [%(uagent)] \"%(method) %(uri) %(proto)\" %(status) %(size) %(micros) %(ttfb)"]
+ENTRYPOINT ["pipenv", "run"]
+CMD ["uwsgi", "--ini", "/opt/arxiv/uwsgi-api.ini"]
diff --git a/Pipfile b/Pipfile
index 4fb12cf6..618e3d50 100644
--- a/Pipfile
+++ b/Pipfile
@@ -4,7 +4,7 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-arxiv-auth = "==0.2.3"
+arxiv-auth = "==0.2.7"
 arxiv-base = "==0.12.1"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index d1fe2f73..edd03da7 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "2821b0070a7746b316adfe7d71044ce7f3d081c12b4370eeb84e9e34a88391e4"
+            "sha256": "0188c2d62e4b47fb7f569e2fb04b4e5ed0215611f9a6bde046fd57eef91fb9b7"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -16,10 +16,10 @@
     "default": {
         "arxiv-auth": {
             "hashes": [
-                "sha256:6d4a5b3e27bf718a95c79e3fef92e7a148b5aca5a0719dca7bdb300c52ce7abb"
+                "sha256:34cf8fb11db111046a77fd14998b06f681c98afac79a972a663af40bc1999e41"
             ],
             "index": "pypi",
-            "version": "==0.2.3"
+            "version": "==0.2.7"
         },
         "arxiv-base": {
             "hashes": [
@@ -30,11 +30,11 @@
         },
         "bleach": {
             "hashes": [
-                "sha256:48d39675b80a75f6d1c3bdbffec791cf0bbbab665cf01e20da701c77de278718",
-                "sha256:73d26f018af5d5adcdabf5c1c974add4361a9c76af215fe32fdec8a6fc5fb9b9"
+                "sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16",
+                "sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa"
             ],
             "index": "pypi",
-            "version": "==3.0.2"
+            "version": "==3.1.0"
         },
         "boto": {
             "hashes": [
@@ -228,39 +228,35 @@
         },
         "lxml": {
             "hashes": [
-                "sha256:02bc220d61f46e9b9d5a53c361ef95e9f5e1d27171cd461dddb17677ae2289a5",
-                "sha256:22f253b542a342755f6cfc047fe4d3a296515cf9b542bc6e261af45a80b8caf6",
-                "sha256:2f31145c7ff665b330919bfa44aacd3a0211a76ca7e7b441039d2a0b0451e415",
-                "sha256:36720698c29e7a9626a0dc802ef8885f8f0239bfd1689628ecd459a061f2807f",
-                "sha256:438a1b0203545521f6616132bfe0f4bca86f8a401364008b30e2b26ec408ce85",
-                "sha256:4815892904c336bbaf73dafd54f45f69f4021c22b5bad7332176bbf4fb830568",
-                "sha256:5be031b0f15ad63910d8e5038b489d95a79929513b3634ad4babf77100602588",
-                "sha256:5c93ae37c3c588e829b037fdfbd64a6e40c901d3f93f7beed6d724c44829a3ad",
-                "sha256:60842230678674cdac4a1cf0f707ef12d75b9a4fc4a565add4f710b5fcf185d5",
-                "sha256:62939a8bb6758d1bf923aa1c13f0bcfa9bf5b2fc0f5fa917a6e25db5fe0cfa4e",
-                "sha256:75830c06a62fe7b8fe3bbb5f269f0b308f19f3949ac81cfd40062f47c1455faf",
-                "sha256:81992565b74332c7c1aff6a913a3e906771aa81c9d0c68c68113cffcae45bc53",
-                "sha256:8c892fb0ee52c594d9a7751c7d7356056a9682674b92cc1c4dc968ff0f30c52f",
-                "sha256:9d862e3cf4fc1f2837dedce9c42269c8c76d027e49820a548ac89fdcee1e361f",
-                "sha256:a623965c086a6e91bb703d4da62dabe59fe88888e82c4117d544e11fd74835d6",
-                "sha256:a7783ab7f6a508b0510490cef9f857b763d796ba7476d9703f89722928d1e113",
-                "sha256:aab09fbe8abfa3b9ce62aaf45aca2d28726b1b9ee44871dbe644050a2fff4940",
-                "sha256:abf181934ac3ef193832fb973fd7f6149b5c531903c2ec0f1220941d73eee601",
-                "sha256:ae07fa0c115733fce1e9da96a3ac3fa24801742ca17e917e0c79d63a01eeb843",
-                "sha256:b9c78242219f674ab645ec571c9a95d70f381319a23911941cd2358a8e0521cf",
-                "sha256:bccb267678b870d9782c3b44d0cefe3ba0e329f9af8c946d32bf3778e7a4f271",
-                "sha256:c4df4d27f4c93b2cef74579f00b1d3a31a929c7d8023f870c4b476f03a274db4",
-                "sha256:caf0e50b546bb60dfa99bb18dfa6748458a83131ecdceaf5c071d74907e7e78a",
-                "sha256:d3266bd3ac59ac4edcd5fa75165dee80b94a3e5c91049df5f7c057ccf097551c",
-                "sha256:db0d213987bcd4e6d41710fb4532b22315b0d8fb439ff901782234456556aed1",
-                "sha256:dbbd5cf7690a40a9f0a9325ab480d0fccf46d16b378eefc08e195d84299bfae1",
-                "sha256:e16e07a0ec3a75b5ee61f2b1003c35696738f937dc8148fbda9fe2147ccb6e61",
-                "sha256:e175a006725c7faadbe69e791877d09936c0ef2cf49d01b60a6c1efcb0e8be6f",
-                "sha256:edd9c13a97f6550f9da2236126bb51c092b3b1ce6187f2bd966533ad794bbb5e",
-                "sha256:fa39ea60d527fbdd94215b5e5552f1c6a912624521093f1384a491a8ad89ad8b"
-            ],
-            "index": "pypi",
-            "version": "==4.2.5"
+                "sha256:0537eee4902e8bf4f41bfee8133f7edf96533dd175930a12086d6a40d62376b2",
+                "sha256:0562ec748abd230ab87d73384e08fa784f9b9cee89e28696087d2d22c052cc27",
+                "sha256:09e91831e749fbf0f24608694e4573be0ef51430229450c39c83176cc2e2d353",
+                "sha256:1ae4c0722fc70c0d4fba43ae33c2885f705e96dce1db41f75ae14a2d2749b428",
+                "sha256:1c630c083d782cbaf1f7f37f6cac87bda9cff643cf2803a5f180f30d97955cef",
+                "sha256:2fe74e3836bd8c0fa7467ffae05545233c7f37de1eb765cacfda15ad20c6574a",
+                "sha256:37af783c2667ead34a811037bda56a0b142ac8438f7ed29ae93f82ddb812fbd6",
+                "sha256:3f2d9eafbb0b24a33f56acd16f39fc935756524dcb3172892721c54713964c70",
+                "sha256:47d8365a8ef14097aa4c65730689be51851b4ade677285a3b2daa03b37893e26",
+                "sha256:510e904079bc56ea784677348e151e1156040dbfb736f1d8ea4b9e6d0ab2d9f4",
+                "sha256:58d0851da422bba31c7f652a7e9335313cf94a641aa6d73b8f3c67602f75b593",
+                "sha256:7940d5c2185ffb989203dacbb28e6ae88b4f1bb25d04e17f94b0edd82232bcbd",
+                "sha256:7cf39bb3a905579836f7a8f3a45320d9eb22f16ab0c1e112efb940ced4d057a5",
+                "sha256:9563a23c1456c0ab550c087833bc13fcc61013a66c6420921d5b70550ea312bf",
+                "sha256:95b392952935947e0786a90b75cc33388549dcb19af716b525dae65b186138fc",
+                "sha256:983129f3fd3cef5c3cf067adcca56e30a169656c00fcc6c648629dbb850b27fa",
+                "sha256:a0b75b1f1854771844c647c464533def3e0a899dd094a85d1d4ed72ecaaee93d",
+                "sha256:b5db89cc0ef624f3a81214b7961a99f443b8c91e88188376b6b322fd10d5b118",
+                "sha256:c0a7751ba1a4bfbe7831920d98cee3ce748007eab8dfda74593d44079568219a",
+                "sha256:c0c5a7d4aafcc30c9b6d8613a362567e32e5f5b708dc41bc3a81dac56f8af8bb",
+                "sha256:d4d63d85eacc6cb37b459b16061e1f100d154bee89dc8d8f9a6128a5a538e92e",
+                "sha256:da5e7e941d6e71c9c9a717c93725cda0708c2474f532e3680ac5e39ec57d224d",
+                "sha256:dccad2b3c583f036f43f80ac99ee212c2fa9a45151358d55f13004d095e683b2",
+                "sha256:df46307d39f2aeaafa1d25309b8a8d11738b73e9861f72d4d0a092528f498baa",
+                "sha256:e70b5e1cb48828ddd2818f99b1662cb9226dc6f57d07fc75485405c77da17436",
+                "sha256:ea825562b8cd057cbc9810d496b8b5dec37a1e2fc7b27bc7c1e72ce94462a09a"
+            ],
+            "index": "pypi",
+            "version": "==4.3.1"
         },
         "markupsafe": {
             "hashes": [
@@ -295,11 +291,11 @@
         },
         "mysqlclient": {
             "hashes": [
-                "sha256:062d78953acb23066c0387a8f3bd0ecf946626f599145bb7fd201460e8f773e1",
-                "sha256:3981ae9ce545901a36a8b7aed76ed02960a429f75dc53b7ad77fb2f9ab7cd56b",
-                "sha256:b3591a00c0366de71d65108627899710d9cfb00e575c4d211aa8de59b1f130c9"
+                "sha256:425e733b05e359a714d6007c0fc44582be66b63e5a3df0a50949274ae16f4bc6",
+                "sha256:62e4770b6a797b9416bcf70488365b7d6b9c9066878108499c559293bb464380",
+                "sha256:f257d250f2675d0ef99bd318906f3cfc05cef4a2f385ea695ff32a3f04b9f9a7"
             ],
-            "version": "==1.3.14"
+            "version": "==1.4.2.post1"
         },
         "nose2": {
             "hashes": [
@@ -447,9 +443,9 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:809547455d012734b4252081db1e6b4fc731de2299f3755708c39863625e1c77"
+                "sha256:8027fa183f5be466030617a497b2d64e0e16c8d615e5a34bdf9fab6f66bf4723"
             ],
-            "version": "==1.2.15"
+            "version": "==1.2.18"
         },
         "thrift": {
             "hashes": [
@@ -505,9 +501,9 @@
         },
         "uwsgi": {
             "hashes": [
-                "sha256:d2318235c74665a60021a4fc7770e9c2756f9fc07de7b8c22805efe85b5ab277"
+                "sha256:4972ac538800fb2d421027f49b4a1869b66048839507ccf0aa2fda792d99f583"
             ],
-            "version": "==2.0.17.1"
+            "version": "==2.0.18"
         },
         "webencodings": {
             "hashes": [
@@ -611,11 +607,11 @@
         },
         "coveralls": {
             "hashes": [
-                "sha256:ab638e88d38916a6cedbf80a9cd8992d5fa55c77ab755e262e00b36792b7cd6d",
-                "sha256:b2388747e2529fa4c669fb1e3e2756e4e07b6ee56c7d9fce05f35ccccc913aa0"
+                "sha256:6f213e461390973f4a97fb9e9d4ebd4956af296ff0a4d868e622108145835cb7",
+                "sha256:a7d0078c9e9b5692c03dcd3884647e837836c265c01e98094632feadef767d36"
             ],
             "index": "pypi",
-            "version": "==1.5.1"
+            "version": "==1.6.0"
         },
         "docopt": {
             "hashes": [
@@ -664,10 +660,10 @@
         },
         "packaging": {
             "hashes": [
-                "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
-                "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
+                "sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af",
+                "sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3"
             ],
-            "version": "==18.0"
+            "version": "==19.0"
         },
         "pygments": {
             "hashes": [
@@ -678,10 +674,10 @@
         },
         "pyparsing": {
             "hashes": [
-                "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
-                "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
+                "sha256:66c9268862641abcac4a96ba74506e594c884e3f57690a696d21ad8210ed667a",
+                "sha256:f6c5ef0d7480ad048c054c37632c67fca55299990fff127850181659eea33fc3"
             ],
-            "version": "==2.3.0"
+            "version": "==2.3.1"
         },
         "pytz": {
             "hashes": [
@@ -723,19 +719,19 @@
         },
         "sphinx": {
             "hashes": [
-                "sha256:120732cbddb1b2364471c3d9f8bfd4b0c5b550862f99a65736c77f970b142aea",
-                "sha256:b348790776490894e0424101af9c8413f2a86831524bd55c5f379d3e3e12ca64"
+                "sha256:b53904fa7cb4b06a39409a492b949193a1b68cc7241a1a8ce9974f86f0d24287",
+                "sha256:c1c00fc4f6e8b101a0d037065043460dffc2d507257f2f11acaed71fd2b0c83c"
             ],
             "index": "pypi",
-            "version": "==1.8.2"
+            "version": "==1.8.4"
         },
         "sphinx-autodoc-typehints": {
             "hashes": [
-                "sha256:9f546fa18ee6bfb17f5cf937805d3c8afea48b976050db0bd14bb463eee97888",
-                "sha256:adfa4712e77c795522574ce644e084cded2f6e796a59d37e7f1cb98287687100"
+                "sha256:19fe0b426b7c008181f67f816060da7f046bd8a42723f67a685d26d875bcefd7",
+                "sha256:f9c06acfec80766fe8f542a6d6a042e751fcf6ce2e2711a7dc00d8b6daf8aa36"
             ],
             "index": "pypi",
-            "version": "==1.5.2"
+            "version": "==1.6.0"
         },
         "sphinxcontrib-websupport": {
             "hashes": [
diff --git a/config/uwsgi-api.ini b/config/uwsgi-api.ini
new file mode 100644
index 00000000..091382ff
--- /dev/null
+++ b/config/uwsgi-api.ini
@@ -0,0 +1,14 @@
+[uwsgi]
+http-socket = :8000
+chdir = /opt/arxiv/
+wsgi-file = wsgi-api.py
+callable = application
+master = true
+harakiri = 3000
+manage-script-name = true
+processes = 1
+queue = 0
+threads = 1
+single-interpreter = true
+mount = /=wsgi-api.py
+logformat = "%(addr) %(addr) - %(user_id)|%(session_id) [%(rtime)] [%(uagent)] \"%(method) %(uri) %(proto)\" %(status) %(size) %(micros) %(ttfb)"
diff --git a/search/routes/api/serialize.py b/search/routes/api/serialize.py
index 910d172a..9243c51b 100644
--- a/search/routes/api/serialize.py
+++ b/search/routes/api/serialize.py
@@ -1,7 +1,7 @@
 """Serializers for API responses."""
 
 from typing import Union, Optional
-from lxml import etree
+from xml.etree import ElementTree as etree
 from flask import jsonify, url_for
 
 from arxiv import status

From bdf668659d744bded1da963234252523241becb1 Mon Sep 17 00:00:00 2001
From: eaw82 <eaw82@cornell.edu>
Date: Tue, 26 Feb 2019 15:01:26 -0500
Subject: [PATCH 70/77] ARXIVNG-1901 apply layout fixes for search results
 display

---
 Pipfile                           |  2 +-
 Pipfile.lock                      | 47 ++++++-------------------------
 search/static/css/search.css      | 11 ++++----
 search/static/css/search.css.map  |  2 +-
 search/static/sass/search.sass    | 11 ++++----
 search/templates/search/base.html |  4 +--
 6 files changed, 22 insertions(+), 55 deletions(-)

diff --git a/Pipfile b/Pipfile
index 618e3d50..711dffc6 100644
--- a/Pipfile
+++ b/Pipfile
@@ -5,7 +5,7 @@ name = "pypi"
 
 [packages]
 arxiv-auth = "==0.2.7"
-arxiv-base = "==0.12.1"
+arxiv-base = "==0.14.3"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index edd03da7..04744ba5 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "0188c2d62e4b47fb7f569e2fb04b4e5ed0215611f9a6bde046fd57eef91fb9b7"
+            "sha256": "8da6ed86a8730616858c16cc719edab981aac16cd3369fabd9082e6bb8e86d28"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:f8fa599e50550e0c6ee9de53030c22c0b8921fca7ac1268753a6b2b0fef177e9"
+                "sha256:4c2ca540aeed0a06394e6eea12364562e2e4733d4803812f5d7542475dd55d09"
             ],
             "index": "pypi",
-            "version": "==0.12.1"
+            "version": "==0.14.3"
         },
         "bleach": {
             "hashes": [
@@ -87,17 +87,13 @@
         "coverage": {
             "hashes": [
                 "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05",
-                "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965",
                 "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f",
                 "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e",
-                "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c",
                 "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236",
-                "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a",
                 "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca",
                 "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc",
                 "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4",
                 "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2",
-                "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea",
                 "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0",
                 "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733",
                 "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674",
@@ -108,11 +104,8 @@
                 "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc",
                 "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea",
                 "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1",
-                "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76",
                 "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa",
-                "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84",
                 "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b",
-                "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d",
                 "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a",
                 "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d",
                 "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31",
@@ -124,8 +117,7 @@
                 "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de",
                 "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540",
                 "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8",
-                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da",
-                "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de"
+                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da"
             ],
             "index": "pypi",
             "version": "==4.4.2"
@@ -174,8 +166,7 @@
         "flask-s3": {
             "hashes": [
                 "sha256:1d49061d4b78759df763358a901f4ed32bb43f672c9f8e1ec7226793f6ae0fd2",
-                "sha256:23cbbb1db4c29c313455dbe16f25be078d6318f0a11abcbb610f99e116945b62",
-                "sha256:d6e1fc3834f0be74c17e26bb8d0f506f711eb888775ab6af9164c0abb6f4c97c"
+                "sha256:23cbbb1db4c29c313455dbe16f25be078d6318f0a11abcbb610f99e116945b62"
             ],
             "index": "pypi",
             "version": "==0.3.3"
@@ -384,15 +375,8 @@
         },
         "pytz": {
             "hashes": [
-                "sha256:59707844a9825589878236ff2f4e0dc9958511b7ffaae94dc615da07d4a68d33",
-                "sha256:699d18a2a56f19ee5698ab1123bbcc1d269d061996aeb1eda6d89248d3542b82",
-                "sha256:80af0f3008046b9975242012a985f04c5df1f01eed4ec1633d56cc47a75a6a48",
-                "sha256:8cc90340159b5d7ced6f2ba77694d946fc975b09f1a51d93f3ce3bb399396f94",
                 "sha256:c41c62827ce9cafacd6f2f7018e4f83a6f1986e87bfd000b8cfbd4ab5da95f1a",
-                "sha256:d0ef5ef55ed3d37854320d4926b04a4cb42a2e88f71da9ddfdacfde8e364f027",
-                "sha256:dd2e4ca6ce3785c8dd342d1853dd9052b19290d5bf66060846e5dc6b8d6667f7",
-                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7",
-                "sha256:feb2365914948b8620347784b6b6da356f31c9d03560259070b2f30cff3d469d"
+                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7"
             ],
             "index": "pypi",
             "version": "==2017.3"
@@ -562,17 +546,13 @@
         "coverage": {
             "hashes": [
                 "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05",
-                "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965",
                 "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f",
                 "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e",
-                "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c",
                 "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236",
-                "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a",
                 "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca",
                 "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc",
                 "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4",
                 "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2",
-                "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea",
                 "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0",
                 "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733",
                 "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674",
@@ -583,11 +563,8 @@
                 "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc",
                 "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea",
                 "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1",
-                "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76",
                 "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa",
-                "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84",
                 "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b",
-                "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d",
                 "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a",
                 "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d",
                 "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31",
@@ -599,8 +576,7 @@
                 "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de",
                 "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540",
                 "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8",
-                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da",
-                "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de"
+                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da"
             ],
             "index": "pypi",
             "version": "==4.4.2"
@@ -681,15 +657,8 @@
         },
         "pytz": {
             "hashes": [
-                "sha256:59707844a9825589878236ff2f4e0dc9958511b7ffaae94dc615da07d4a68d33",
-                "sha256:699d18a2a56f19ee5698ab1123bbcc1d269d061996aeb1eda6d89248d3542b82",
-                "sha256:80af0f3008046b9975242012a985f04c5df1f01eed4ec1633d56cc47a75a6a48",
-                "sha256:8cc90340159b5d7ced6f2ba77694d946fc975b09f1a51d93f3ce3bb399396f94",
                 "sha256:c41c62827ce9cafacd6f2f7018e4f83a6f1986e87bfd000b8cfbd4ab5da95f1a",
-                "sha256:d0ef5ef55ed3d37854320d4926b04a4cb42a2e88f71da9ddfdacfde8e364f027",
-                "sha256:dd2e4ca6ce3785c8dd342d1853dd9052b19290d5bf66060846e5dc6b8d6667f7",
-                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7",
-                "sha256:feb2365914948b8620347784b6b6da356f31c9d03560259070b2f30cff3d469d"
+                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7"
             ],
             "index": "pypi",
             "version": "==2017.3"
diff --git a/search/static/css/search.css b/search/static/css/search.css
index 6095ded5..59cdb1c5 100644
--- a/search/static/css/search.css
+++ b/search/static/css/search.css
@@ -29,6 +29,11 @@
 
 .pagination-list {
   margin: 0 !important; }
+  .pagination-list li {
+    list-style: none;
+    margin-bottom: 0; }
+  .pagination-list li + li {
+    margin: 0; }
 
 .breathe-vertical {
   margin-top: 4rem;
@@ -43,12 +48,6 @@
     margin-left: 10rem !important;
     margin-right: 10rem !important; } }
 
-.pagination-list li {
-  list-style: none; }
-
-.pagination-list li + li {
-  margin: 0; }
-
 .fieldset {
   border: 1px solid #DBDBDB;
   border-radius: 2px;
diff --git a/search/static/css/search.css.map b/search/static/css/search.css.map
index 283b7c5f..cb24a66d 100644
--- a/search/static/css/search.css.map
+++ b/search/static/css/search.css.map
@@ -1,6 +1,6 @@
 {
 "version": 3,
-"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,6BAAM;IACJ,WAAW,EAAE,MAAM;EACrB,4BAAK;IACH,aAAa,EAAE,CAAC;IAChB,6CAAkB;MAChB,aAAa,EAAE,CAAC;EACpB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;IACjB,WAAW,EAAE,MAAM;;AAEzB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;;AAEtB,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,mBAAmB;EACjB,UAAU,EAAE,IAAI;;AAElB,wBAA+D;EAC7D,MAAM,EAAE,CAAC;;AAEX,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
+"mappings": "AACE,sBAAa;EACX,aAAa,EAAE,MAAM;EACrB,wBAAC;IACC,aAAa,EAAE,MAAK;EACtB,6BAAM;IACJ,WAAW,EAAE,MAAM;EACrB,4BAAK;IACH,aAAa,EAAE,CAAC;IAChB,6CAAkB;MAChB,aAAa,EAAE,CAAC;EACpB,+BAAQ;IACN,MAAM,EAAE,OAAO;EACjB,kCAAW;IACT,SAAS,EAAE,MAAM;IACjB,WAAW,EAAE,MAAM;;AAEzB,WAAW;EACT,KAAK,EAAE,kBAAkB;EACzB,gBAAgB,EAAE,kBAAkB;EACpC,WAAW,EAAE,IAAI;;;AAInB,aAAa;EACX,WAAW,EAAE,SAAS;;AAExB,WAAW;EACT,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAErB,gBAAgB;EACd,MAAM,EAAE,YAAY;EACpB,mBAAE;IACA,UAAU,EAAE,IAAI;IAChB,aAAa,EAAE,CAAC;EAClB,wBAA+C;IAC7C,MAAM,EAAE,CAAC;;AAEb,iBAAiB;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGnB,oCAAoC;EADtC,mBAAmB;IAEf,WAAW,EAAE,eAAe;IAC5B,YAAY,EAAE,eAAe;AAC/B,oCAAoC;EAJtC,mBAAmB;IAKf,WAAW,EAAE,gBAAgB;IAC7B,YAAY,EAAE,gBAAgB;;AAElC,SAAS;EACP,MAAM,EAAE,iBAAkC;EAC1C,aAAa,EAAE,GAAG;EAClB,OAAO,EAAE,iBAAgB;EACzB,MAAM,EAAE,KAAK;;AAEf,oBAAoB;EAClB,MAAM,EAAE,iBAAiB;;AAE3B,OAAO;EACL,WAAW,EAAE,GAAG;EAChB,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,OAAM;EACf,WAAW,EAAE,MAAK;;AAEpB,YAAY;EACV,WAAW,EAAE,QAAQ;;AAEvB,aAAa;EACX,MAAM,EAAE,CAAC;EACT,IAAI,EAAE,gBAAa;EACnB,MAAM,EAAE,GAAG;EACX,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,IAAI;EACZ,QAAQ,EAAE,MAAM;EAChB,OAAO,EAAE,CAAC;EACV,QAAQ,EAAE,QAAQ;;AAEpB,aAAa;EACX,SAAS,EAAE,IAAI;;;AAMX,kDAAkB;EAChB,aAAa,EAAE,MAAK;AACxB,oCAAoC;EAJtC,wBAAmB;IAKf,OAAO,EAAE,IAAI;IACb,eAAe,EAAE,UAAU;IAEzB,kDAAkB;MAChB,YAAY,EAAE,IAAI;MAClB,aAAa,EAAE,CAAC;IAEhB;;gEAAQ;MAGN,yBAAyB,EAAE,GAAG;MAC9B,sBAAsB,EAAE,GAAG;IAE7B;;+DAAQ;MAGN,0BAA0B,EAAE,GAAG;MAC/B,uBAAuB,EAAE,GAAG;IAChC;;oDAAQ;MAGN,aAAa,EAAE,CAAC;MAChB;;;;iEAAQ;QAEN,OAAO,EAAE,CAAC;MACZ;;;;;;;;gEAAQ;QAIN,OAAO,EAAE,CAAC;QACV;;;;;;;;wEAAO;UACL,OAAO,EAAE,CAAC;IAChB,6CAAa;MACX,SAAS,EAAE,CAAC;IAChB,4CAAqB;MACnB,eAAe,EAAE,MAAM;IACzB,yCAAkB;MAChB,eAAe,EAAE,QAAQ;IAEzB,sDAAQ;MACN,SAAS,EAAE,CAAC;MACZ,WAAW,EAAE,CAAC",
 "sources": ["../sass/search.sass"],
 "names": [],
 "file": "search.css"
diff --git a/search/static/sass/search.sass b/search/static/sass/search.sass
index 8c01fb60..ea5a78cb 100644
--- a/search/static/sass/search.sass
+++ b/search/static/sass/search.sass
@@ -31,6 +31,11 @@
 
 .pagination-list
   margin: 0 !important
+  li
+    list-style: none
+    margin-bottom: 0
+  li + li  // fixes override from content li + li
+    margin: 0
 
 .breathe-vertical
   margin-top: 4rem
@@ -44,12 +49,6 @@
     margin-left: 10rem !important
     margin-right: 10rem !important
 
-.pagination-list li
-  list-style: none
-
-.pagination-list li + li // fixes override from content li + li
-  margin: 0
-
 .fieldset
   border: 1px solid #DBDBDB // $grey-lighter
   border-radius: 2px // $radius-small
diff --git a/search/templates/search/base.html b/search/templates/search/base.html
index bf43ffda..de259d43 100644
--- a/search/templates/search/base.html
+++ b/search/templates/search/base.html
@@ -47,7 +47,7 @@ <h1 class="title is-clearfix">{% block title %}Search{% endblock title %}</h1>
       <button class="button is-small" id="feedback-button">Feedback?</button>
     </div>
   </div>
-    <content>
+    <div class="content">
       {% block within_content %}
         Specific results here
       {% endblock within_content %}
@@ -56,5 +56,5 @@ <h1 class="title is-clearfix">{% block title %}Search{% endblock title %}</h1>
         <span class="help" style="display: inline-block;"><a href="{{ config.RELEASE_NOTES_URL }}">{{ config.RELEASE_NOTES_TEXT }}</a>&nbsp;&nbsp;</span>
         <button class="button is-small" id="feedback-button">Feedback?</button>
       </div>
-    </content>
+    </div>
 {% endblock content %}

From aa10e301f02572237a4c771a091a2fd3e482f21c Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jmm698@cornell.edu>
Date: Wed, 6 Mar 2019 16:01:44 -0500
Subject: [PATCH 71/77] adding start and size to docs, adjusting MAXIMUM_size
 to match old API

---
 schema/search.yaml    | 20 ++++++++++++++++++++
 search/domain/base.py |  2 +-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/schema/search.yaml b/schema/search.yaml
index 8f6d3306..0e8fc087 100644
--- a/schema/search.yaml
+++ b/schema/search.yaml
@@ -399,6 +399,26 @@ paths:
               - submitted_date
               - announced_date_first
           example: submitted_date_first
+        
+        - name: start
+          in: query
+          description: |
+            Defines the index of the first returned result, using 0-based indexing.
+          required: false
+          schema:
+            type: integer
+          example: 0
+
+        - name: size
+          in: query
+          description: |
+            The number of results returned by the query. Used in conjunction with "start" for 
+            pagination. Size is limited to 30000, in slices of 2000.
+          required: false
+          schema:
+            type: integer
+          example: 10
+        
 
       responses:
         '200':
diff --git a/search/domain/base.py b/search/domain/base.py
index 57a203fe..fc074941 100644
--- a/search/domain/base.py
+++ b/search/domain/base.py
@@ -196,7 +196,7 @@ def __str__(self) -> str:
 class Query:
     """Represents a search query originating from the UI or API."""
 
-    MAXIMUM_size = 500
+    MAXIMUM_size = 2000
     """The maximum number of records that can be retrieved."""
 
     SUPPORTED_FIELDS = [

From 34c32b95eaae16f732457c2b4d92019bd171efe5 Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Thu, 7 Mar 2019 09:29:01 -0500
Subject: [PATCH 72/77] upgrade arxiv-base to 0.15.1 which includes four-column
 footer

---
 Pipfile      |   2 +-
 Pipfile.lock | 109 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/Pipfile b/Pipfile
index 711dffc6..07378ed1 100644
--- a/Pipfile
+++ b/Pipfile
@@ -5,7 +5,7 @@ name = "pypi"
 
 [packages]
 arxiv-auth = "==0.2.7"
-arxiv-base = "==0.14.3"
+arxiv-base = "==0.15.1"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 04744ba5..5a12f1c4 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "8da6ed86a8730616858c16cc719edab981aac16cd3369fabd9082e6bb8e86d28"
+            "sha256": "f774d00518c8a0b478a50244f924078d9a8e11340a7ce4eaf13439f0be81c72d"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:4c2ca540aeed0a06394e6eea12364562e2e4733d4803812f5d7542475dd55d09"
+                "sha256:9f068f24567fb7ebebcc8fe05b98589c045ec7926441183309f468212d94aa2a"
             ],
             "index": "pypi",
-            "version": "==0.14.3"
+            "version": "==0.15.1"
         },
         "bleach": {
             "hashes": [
@@ -87,13 +87,17 @@
         "coverage": {
             "hashes": [
                 "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05",
+                "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965",
                 "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f",
                 "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e",
+                "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c",
                 "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236",
+                "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a",
                 "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca",
                 "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc",
                 "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4",
                 "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2",
+                "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea",
                 "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0",
                 "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733",
                 "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674",
@@ -104,8 +108,11 @@
                 "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc",
                 "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea",
                 "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1",
+                "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76",
                 "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa",
+                "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84",
                 "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b",
+                "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d",
                 "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a",
                 "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d",
                 "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31",
@@ -117,7 +124,8 @@
                 "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de",
                 "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540",
                 "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8",
-                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da"
+                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da",
+                "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de"
             ],
             "index": "pypi",
             "version": "==4.4.2"
@@ -166,7 +174,8 @@
         "flask-s3": {
             "hashes": [
                 "sha256:1d49061d4b78759df763358a901f4ed32bb43f672c9f8e1ec7226793f6ae0fd2",
-                "sha256:23cbbb1db4c29c313455dbe16f25be078d6318f0a11abcbb610f99e116945b62"
+                "sha256:23cbbb1db4c29c313455dbe16f25be078d6318f0a11abcbb610f99e116945b62",
+                "sha256:d6e1fc3834f0be74c17e26bb8d0f506f711eb888775ab6af9164c0abb6f4c97c"
             ],
             "index": "pypi",
             "version": "==0.3.3"
@@ -219,35 +228,35 @@
         },
         "lxml": {
             "hashes": [
-                "sha256:0537eee4902e8bf4f41bfee8133f7edf96533dd175930a12086d6a40d62376b2",
-                "sha256:0562ec748abd230ab87d73384e08fa784f9b9cee89e28696087d2d22c052cc27",
-                "sha256:09e91831e749fbf0f24608694e4573be0ef51430229450c39c83176cc2e2d353",
-                "sha256:1ae4c0722fc70c0d4fba43ae33c2885f705e96dce1db41f75ae14a2d2749b428",
-                "sha256:1c630c083d782cbaf1f7f37f6cac87bda9cff643cf2803a5f180f30d97955cef",
-                "sha256:2fe74e3836bd8c0fa7467ffae05545233c7f37de1eb765cacfda15ad20c6574a",
-                "sha256:37af783c2667ead34a811037bda56a0b142ac8438f7ed29ae93f82ddb812fbd6",
-                "sha256:3f2d9eafbb0b24a33f56acd16f39fc935756524dcb3172892721c54713964c70",
-                "sha256:47d8365a8ef14097aa4c65730689be51851b4ade677285a3b2daa03b37893e26",
-                "sha256:510e904079bc56ea784677348e151e1156040dbfb736f1d8ea4b9e6d0ab2d9f4",
-                "sha256:58d0851da422bba31c7f652a7e9335313cf94a641aa6d73b8f3c67602f75b593",
-                "sha256:7940d5c2185ffb989203dacbb28e6ae88b4f1bb25d04e17f94b0edd82232bcbd",
-                "sha256:7cf39bb3a905579836f7a8f3a45320d9eb22f16ab0c1e112efb940ced4d057a5",
-                "sha256:9563a23c1456c0ab550c087833bc13fcc61013a66c6420921d5b70550ea312bf",
-                "sha256:95b392952935947e0786a90b75cc33388549dcb19af716b525dae65b186138fc",
-                "sha256:983129f3fd3cef5c3cf067adcca56e30a169656c00fcc6c648629dbb850b27fa",
-                "sha256:a0b75b1f1854771844c647c464533def3e0a899dd094a85d1d4ed72ecaaee93d",
-                "sha256:b5db89cc0ef624f3a81214b7961a99f443b8c91e88188376b6b322fd10d5b118",
-                "sha256:c0a7751ba1a4bfbe7831920d98cee3ce748007eab8dfda74593d44079568219a",
-                "sha256:c0c5a7d4aafcc30c9b6d8613a362567e32e5f5b708dc41bc3a81dac56f8af8bb",
-                "sha256:d4d63d85eacc6cb37b459b16061e1f100d154bee89dc8d8f9a6128a5a538e92e",
-                "sha256:da5e7e941d6e71c9c9a717c93725cda0708c2474f532e3680ac5e39ec57d224d",
-                "sha256:dccad2b3c583f036f43f80ac99ee212c2fa9a45151358d55f13004d095e683b2",
-                "sha256:df46307d39f2aeaafa1d25309b8a8d11738b73e9861f72d4d0a092528f498baa",
-                "sha256:e70b5e1cb48828ddd2818f99b1662cb9226dc6f57d07fc75485405c77da17436",
-                "sha256:ea825562b8cd057cbc9810d496b8b5dec37a1e2fc7b27bc7c1e72ce94462a09a"
-            ],
-            "index": "pypi",
-            "version": "==4.3.1"
+                "sha256:0358b9e9642bc7d39aac5cffe9884a99a5ca68e5e2c1b89e570ed60da9139908",
+                "sha256:091a359c4dafebbecd3959d9013f1b896b5371859165e4e50b01607a98d9e3e2",
+                "sha256:1998e4e60603c64bcc35af61b4331ab3af087457900d3980e18d190e17c3a697",
+                "sha256:2000b4088dee9a41f459fddaf6609bba48a435ce6374bb254c5ccdaa8928c5ba",
+                "sha256:2afb0064780d8aaf165875be5898c1866766e56175714fa5f9d055433e92d41d",
+                "sha256:2d8f1d9334a4e3ff176d096c14ded3100547d73440683567d85b8842a53180bb",
+                "sha256:2e38db22f6a3199fd63675e1b4bd795d676d906869047398f29f38ca55cb453a",
+                "sha256:3181f84649c1a1ca62b19ddf28436b1b2cb05ae6c7d2628f33872e713994c364",
+                "sha256:37462170dfd88af8431d04de6b236e6e9c06cda71e2ca26d88ef2332fd2a5237",
+                "sha256:3a9d8521c89bf6f2a929c3d12ad3ad7392c774c327ea809fd08a13be6b3bc05f",
+                "sha256:3d0bbd2e1a28b4429f24fd63a122a450ce9edb7a8063d070790092d7343a1aa4",
+                "sha256:483d60585ce3ee71929cea70949059f83850fa5e12deb9c094ed1c8c2ec73cbd",
+                "sha256:4888be27d5cba55ce94209baef5bcd7bbd7314a3d17021a5fc10000b3a5f737d",
+                "sha256:64b0d62e4209170a2a0c404c446ab83b941a0003e96604d2e4f4cb735f8a2254",
+                "sha256:68010900898fdf139ac08549c4dba8206c584070a960ffc530aebf0c6f2794ef",
+                "sha256:872ecb066de602a0099db98bd9e57f4cfc1d62f6093d94460c787737aa08f39e",
+                "sha256:88a32b03f2e4cd0e63f154cac76724709f40b3fc2f30139eb5d6f900521b44ed",
+                "sha256:b1dc7683da4e67ab2bebf266afa68098d681ae02ce570f0d1117312273d2b2ac",
+                "sha256:b29e27ce9371810250cb1528a771d047a9c7b0f79630dc7dc5815ff828f4273b",
+                "sha256:ce197559596370d985f1ce6b7051b52126849d8159040293bf8b98cb2b3e1f78",
+                "sha256:d45cf6daaf22584eff2175f48f82c4aa24d8e72a44913c5aff801819bb73d11f",
+                "sha256:e2ff9496322b2ce947ba4a7a5eb048158de9d6f3fe9efce29f1e8dd6878561e6",
+                "sha256:f7b979518ec1f294a41a707c007d54d0f3b3e1fd15d5b26b7e99b62b10d9a72e",
+                "sha256:f9c7268e9d16e34e50f8246c4f24cf7353764affd2bc971f0379514c246e3f6b",
+                "sha256:f9c839806089d79de588ee1dde2dae05dc1156d3355dfeb2b51fde84d9c960ad",
+                "sha256:ff962953e2389226adc4d355e34a98b0b800984399153c6678f2367b11b4d4b8"
+            ],
+            "index": "pypi",
+            "version": "==4.3.2"
         },
         "markupsafe": {
             "hashes": [
@@ -375,8 +384,15 @@
         },
         "pytz": {
             "hashes": [
+                "sha256:59707844a9825589878236ff2f4e0dc9958511b7ffaae94dc615da07d4a68d33",
+                "sha256:699d18a2a56f19ee5698ab1123bbcc1d269d061996aeb1eda6d89248d3542b82",
+                "sha256:80af0f3008046b9975242012a985f04c5df1f01eed4ec1633d56cc47a75a6a48",
+                "sha256:8cc90340159b5d7ced6f2ba77694d946fc975b09f1a51d93f3ce3bb399396f94",
                 "sha256:c41c62827ce9cafacd6f2f7018e4f83a6f1986e87bfd000b8cfbd4ab5da95f1a",
-                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7"
+                "sha256:d0ef5ef55ed3d37854320d4926b04a4cb42a2e88f71da9ddfdacfde8e364f027",
+                "sha256:dd2e4ca6ce3785c8dd342d1853dd9052b19290d5bf66060846e5dc6b8d6667f7",
+                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7",
+                "sha256:feb2365914948b8620347784b6b6da356f31c9d03560259070b2f30cff3d469d"
             ],
             "index": "pypi",
             "version": "==2017.3"
@@ -427,9 +443,9 @@
         },
         "sqlalchemy": {
             "hashes": [
-                "sha256:8027fa183f5be466030617a497b2d64e0e16c8d615e5a34bdf9fab6f66bf4723"
+                "sha256:11ead7047ff3f394ed0d4b62aded6c5d970a9b718e1dc6add9f5e79442cc5b14"
             ],
-            "version": "==1.2.18"
+            "version": "==1.3.0"
         },
         "thrift": {
             "hashes": [
@@ -546,13 +562,17 @@
         "coverage": {
             "hashes": [
                 "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05",
+                "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965",
                 "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f",
                 "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e",
+                "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c",
                 "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236",
+                "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a",
                 "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca",
                 "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc",
                 "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4",
                 "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2",
+                "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea",
                 "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0",
                 "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733",
                 "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674",
@@ -563,8 +583,11 @@
                 "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc",
                 "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea",
                 "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1",
+                "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76",
                 "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa",
+                "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84",
                 "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b",
+                "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d",
                 "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a",
                 "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d",
                 "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31",
@@ -576,7 +599,8 @@
                 "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de",
                 "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540",
                 "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8",
-                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da"
+                "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da",
+                "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de"
             ],
             "index": "pypi",
             "version": "==4.4.2"
@@ -657,8 +681,15 @@
         },
         "pytz": {
             "hashes": [
+                "sha256:59707844a9825589878236ff2f4e0dc9958511b7ffaae94dc615da07d4a68d33",
+                "sha256:699d18a2a56f19ee5698ab1123bbcc1d269d061996aeb1eda6d89248d3542b82",
+                "sha256:80af0f3008046b9975242012a985f04c5df1f01eed4ec1633d56cc47a75a6a48",
+                "sha256:8cc90340159b5d7ced6f2ba77694d946fc975b09f1a51d93f3ce3bb399396f94",
                 "sha256:c41c62827ce9cafacd6f2f7018e4f83a6f1986e87bfd000b8cfbd4ab5da95f1a",
-                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7"
+                "sha256:d0ef5ef55ed3d37854320d4926b04a4cb42a2e88f71da9ddfdacfde8e364f027",
+                "sha256:dd2e4ca6ce3785c8dd342d1853dd9052b19290d5bf66060846e5dc6b8d6667f7",
+                "sha256:fae4cffc040921b8a2d60c6cf0b5d662c1190fe54d718271db4eb17d44a185b7",
+                "sha256:feb2365914948b8620347784b6b6da356f31c9d03560259070b2f30cff3d469d"
             ],
             "index": "pypi",
             "version": "==2017.3"

From e8a510894087e828e90b26e4e8112640f9b6724b Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Thu, 7 Mar 2019 09:53:09 -0500
Subject: [PATCH 73/77] mypy fix

---
 search/routes/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search/routes/ui.py b/search/routes/ui.py
index efe8341d..0d89d8c2 100644
--- a/search/routes/ui.py
+++ b/search/routes/ui.py
@@ -76,7 +76,7 @@ def search(archives: Optional[List[str]] = None) -> Union[str, Response]:
     logger.debug(f"controller returned code: {code}")
     if code == status.HTTP_200_OK:
         return render_template("search/search.html", pagetitle="Search",
-                               archives=archives, **response)
+                               archives=archives, **response)  # type: ignore
     elif (code == status.HTTP_301_MOVED_PERMANENTLY
           or code == status.HTTP_303_SEE_OTHER):
         return redirect(headers['Location'], code=code)

From 674a2f18b518e07786b8482c44e3741e61a3fe4f Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Thu, 7 Mar 2019 11:42:03 -0500
Subject: [PATCH 74/77] update arxiv-base to 0.15.2

---
 Pipfile      | 2 +-
 Pipfile.lock | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Pipfile b/Pipfile
index 07378ed1..7cc355ec 100644
--- a/Pipfile
+++ b/Pipfile
@@ -5,7 +5,7 @@ name = "pypi"
 
 [packages]
 arxiv-auth = "==0.2.7"
-arxiv-base = "==0.15.1"
+arxiv-base = "==0.15.2"
 boto = "==2.48.0"
 "boto3" = "==1.6.6"
 botocore = "==1.9.6"
diff --git a/Pipfile.lock b/Pipfile.lock
index 5a12f1c4..fd6b72b4 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "f774d00518c8a0b478a50244f924078d9a8e11340a7ce4eaf13439f0be81c72d"
+            "sha256": "07347129934933210e608a081f620334989fb6d9d98017c88152ac8a88320009"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -23,10 +23,10 @@
         },
         "arxiv-base": {
             "hashes": [
-                "sha256:9f068f24567fb7ebebcc8fe05b98589c045ec7926441183309f468212d94aa2a"
+                "sha256:a5c159a937d2d4304440e661ea14ff81bf32950e19765a14bc9efcb7750bbfb3"
             ],
             "index": "pypi",
-            "version": "==0.15.1"
+            "version": "==0.15.2"
         },
         "bleach": {
             "hashes": [

From fad3229729eb4112f1556217a025dc2b0d6d7a05 Mon Sep 17 00:00:00 2001
From: Martin Lessmeister <mhl10@cornell.edu>
Date: Thu, 7 Mar 2019 12:00:11 -0500
Subject: [PATCH 75/77] lintstats with more debug

---
 lintstats.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lintstats.sh b/lintstats.sh
index 06d9e4e7..81a6379e 100755
--- a/lintstats.sh
+++ b/lintstats.sh
@@ -14,7 +14,7 @@ curl -u $USERNAME:$GITHUB_TOKEN \
 
 
 # Check mypy integration
-MYPY_STATUS=$( pipenv run mypy -p search | grep -v "test.*" | grep -v "defined here" | wc -l | tr -d '[:space:]' )
+MYPY_STATUS=$( pipenv run mypy -p search | grep -v "test.*" | grep -v "defined here" | tee /dev/tty | wc -l | tr -d '[:space:]' )
 if [ $MYPY_STATUS -ne 0 ]; then MYPY_STATE="failure" && echo "mypy failed"; else MYPY_STATE="success" &&  echo "mypy passed"; fi
 
 curl -u $USERNAME:$GITHUB_TOKEN \

From 3651f0596f3a962ef10dd4195da6008488576168 Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jmm698@cornell.edu>
Date: Thu, 7 Mar 2019 09:30:53 -0500
Subject: [PATCH 76/77] Correcting mypy errors with an upgrade and some type:
 ignore

---
 Pipfile                                 |  3 +-
 Pipfile.lock                            | 62 +++++++++++++------------
 search/context.py                       |  2 +-
 search/controllers/advanced/__init__.py |  4 +-
 search/controllers/api/__init__.py      |  9 ++--
 search/factory.py                       |  5 +-
 6 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/Pipfile b/Pipfile
index 7cc355ec..3059c264 100644
--- a/Pipfile
+++ b/Pipfile
@@ -28,7 +28,7 @@ jsonschema = "==2.6.0"
 markupsafe = "==1.0"
 mccabe = "==0.6.1"
 mock = "==2.0.0"
-mypy = "==0.560"
+mypy = "==0.670"
 "nose2" = "==0.7.3"
 pbr = "==3.1.1"
 psutil = "==5.4.3"
@@ -43,7 +43,6 @@ requests = "==2.20.0"
 snowballstemmer = "==1.2.1"
 thrift = "==0.11.0"
 thrift-connector = "==0.23"
-typed-ast = "==1.1.0"
 "urllib3" = "==1.22"
 werkzeug = "==0.14.1"
 wtforms = "==2.1"
diff --git a/Pipfile.lock b/Pipfile.lock
index 49f1acb7..651fadc2 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "07347129934933210e608a081f620334989fb6d9d98017c88152ac8a88320009"
+            "sha256": "6072c8a0a2ff9f76ff7f46f802420339d62e91090f1aa8740d7a5cb4b65ac9b4"
         },
         "pipfile-spec": 6,
         "requires": {},
@@ -274,11 +274,18 @@
         },
         "mypy": {
             "hashes": [
-                "sha256:aa668809ae0dbec5e9feb8929f4b5e1f9318a0a397447fa2f38c382a2ed6a036",
-                "sha256:bd0c9a2fcf0c4f7a54a2b625f466fcc000d415f371298d96fa5d2acc69074aca"
+                "sha256:308c274eb8482fbf16006f549137ddc0d69e5a589465e37b99c4564414363ca7",
+                "sha256:e80fd6af34614a0e898a57f14296d0dacb584648f0339c2e000ddbf0f4cc2f8d"
             ],
             "index": "pypi",
-            "version": "==0.560"
+            "version": "==0.670"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812",
+                "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e"
+            ],
+            "version": "==0.4.1"
         },
         "mysqlclient": {
             "hashes": [
@@ -448,32 +455,27 @@
         },
         "typed-ast": {
             "hashes": [
-                "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58",
-                "sha256:10703d3cec8dcd9eef5a630a04056bbc898abc19bac5691612acba7d1325b66d",
-                "sha256:1f6c4bd0bdc0f14246fd41262df7dfc018d65bb05f6e16390b7ea26ca454a291",
-                "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a",
-                "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9",
-                "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892",
-                "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9",
-                "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded",
-                "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa",
-                "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe",
-                "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd",
-                "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85",
-                "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6",
-                "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46",
-                "sha256:898f818399cafcdb93cbbe15fc83a33d05f18e29fb498ddc09b0214cdfc7cd51",
-                "sha256:94b091dc0f19291adcb279a108f5d38de2430411068b219f41b343c03b28fb1f",
-                "sha256:a26863198902cda15ab4503991e8cf1ca874219e0118cbf07c126bce7c4db129",
-                "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c",
-                "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea",
-                "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863",
-                "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559",
-                "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87",
-                "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6"
-            ],
-            "index": "pypi",
-            "version": "==1.1.0"
+                "sha256:035a54ede6ce1380599b2ce57844c6554666522e376bd111eb940fbc7c3dad23",
+                "sha256:037c35f2741ce3a9ac0d55abfcd119133cbd821fffa4461397718287092d9d15",
+                "sha256:049feae7e9f180b64efacbdc36b3af64a00393a47be22fa9cb6794e68d4e73d3",
+                "sha256:19228f7940beafc1ba21a6e8e070e0b0bfd1457902a3a81709762b8b9039b88d",
+                "sha256:2ea681e91e3550a30c2265d2916f40a5f5d89b59469a20f3bad7d07adee0f7a6",
+                "sha256:3a6b0a78af298d82323660df5497bcea0f0a4a25a0b003afd0ce5af049bd1f60",
+                "sha256:5385da8f3b801014504df0852bf83524599df890387a3c2b17b7caa3d78b1773",
+                "sha256:606d8afa07eef77280c2bf84335e24390055b478392e1975f96286d99d0cb424",
+                "sha256:69245b5b23bbf7fb242c9f8f08493e9ecd7711f063259aefffaeb90595d62287",
+                "sha256:6f6d839ab09830d59b7fa8fb6917023d8cb5498ee1f1dbd82d37db78eb76bc99",
+                "sha256:730888475f5ac0e37c1de4bd05eeb799fdb742697867f524dc8a4cd74bcecc23",
+                "sha256:9819b5162ffc121b9e334923c685b0d0826154e41dfe70b2ede2ce29034c71d8",
+                "sha256:9e60ef9426efab601dd9aa120e4ff560f4461cf8442e9c0a2b92548d52800699",
+                "sha256:af5fbdde0690c7da68e841d7fc2632345d570768ea7406a9434446d7b33b0ee1",
+                "sha256:b64efdbdf3bbb1377562c179f167f3bf301251411eb5ac77dec6b7d32bcda463",
+                "sha256:bac5f444c118aeb456fac1b0b5d14c6a71ea2a42069b09c176f75e9bd4c186f6",
+                "sha256:bda9068aafb73859491e13b99b682bd299c1b5fd50644d697533775828a28ee0",
+                "sha256:d659517ca116e6750101a1326107d3479028c5191f0ecee3c7203c50f5b915b0",
+                "sha256:eddd3fb1f3e0f82e5915a899285a39ee34ce18fd25d89582bc89fc9fb16cd2c6"
+            ],
+            "version": "==1.3.1"
         },
         "urllib3": {
             "hashes": [
diff --git a/search/context.py b/search/context.py
index 67fd2f38..cf7b5169 100644
--- a/search/context.py
+++ b/search/context.py
@@ -7,7 +7,7 @@
 import werkzeug
 
 
-def get_application_config(app: Flask = None) -> Union[dict, os._Environ]:
+def get_application_config(app: Optional[Union[Flask, object]] = None) -> Union[dict, os._Environ]:
     """
     Get a configuration from the current app, or fall back to env.
 
diff --git a/search/controllers/advanced/__init__.py b/search/controllers/advanced/__init__.py
index dbca1b23..f3425874 100644
--- a/search/controllers/advanced/__init__.py
+++ b/search/controllers/advanced/__init__.py
@@ -79,8 +79,8 @@ def search(request_params: MultiDict) -> Response:
     has_classic = False
     for key in request_params.keys():
         if key.startswith('terms-') and key.endswith('-term'):
-            value = request_params.get(key)
-            i = re.search('terms-([0-9])+-term', key).group(1)
+            value: str = request_params.get(key) # type: ignore
+            i = re.search('terms-([0-9])+-term', key).group(1) # type: ignore
             field = request_params.get(f'terms-{i}-field')
             # We are only looking for this syntax in the author search, or
             # in an all-fields search.
diff --git a/search/controllers/api/__init__.py b/search/controllers/api/__init__.py
index d689f4e7..7a15995f 100644
--- a/search/controllers/api/__init__.py
+++ b/search/controllers/api/__init__.py
@@ -151,11 +151,11 @@ def _get_date_params(params: MultiDict, query_terms: List) \
                 dt = pytz.utc.localize(dt)
             dt = dt.replace(tzinfo=EASTERN)
         except ValueError:
-            raise BadRequest({'field': field, 'reason': 'invalid datetime'})
+            raise BadRequest(f'Invalid datetime in {field}')
         date_params[field] = dt
         query_terms.append({'parameter': field, 'value': dt})
     if 'date_type' in params:
-        date_params['date_type'] = params.get('date_type')
+        date_params['date_type'] = params.get('date_type') # type: ignore
         query_terms.append({'parameter': 'date_type',
                             'value': date_params['date_type']})
     if date_params:
@@ -192,9 +192,6 @@ def _get_classification(value: str, field: str, query_terms: List) \
     try:
         clsns = _to_classification(value, query_terms)
     except ValueError:
-        raise BadRequest({
-            'field': field,
-            'reason': 'not a valid classification term'
-        })
+        raise BadRequest(f'Not a valid classification term: {field}={value}')
     query_terms.append({'parameter': field, 'value': value})
     return clsns
diff --git a/search/factory.py b/search/factory.py
index da1a6231..4d9c84c4 100644
--- a/search/factory.py
+++ b/search/factory.py
@@ -24,7 +24,7 @@ def create_ui_web_app() -> Flask:
     logging.getLogger('botocore').setLevel(logging.ERROR)
 
     app = Flask('search')
-    app.config.from_pyfile('config.py')
+    app.config.from_pyfile('config.py') # type: ignore
     app.url_map.converters['archive'] = ArchiveConverter
 
     index.init_app(app)
@@ -47,7 +47,7 @@ def create_api_web_app() -> Flask:
 
     app = Flask('search')
     app.json_encoder = ISO8601JSONEncoder
-    app.config.from_pyfile('config.py')
+    app.config.from_pyfile('config.py') # type: ignore
 
     index.init_app(app)
 
@@ -62,3 +62,4 @@ def create_api_web_app() -> Flask:
             app.errorhandler(error)(handler)
 
     return app
+

From 355b23ef99c360941b34b5972a1087f63e45ce7b Mon Sep 17 00:00:00 2001
From: Jaimie Murdock <jmm698@cornell.edu>
Date: Thu, 7 Mar 2019 13:23:59 -0500
Subject: [PATCH 77/77] mypy ignores for a variety of errors

---
 search/routes/api/__init__.py   |  4 ++--
 search/routes/api/exceptions.py | 14 +++++++-------
 search/routes/ui.py             | 18 +++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/search/routes/api/__init__.py b/search/routes/api/__init__.py
index a2dc6fb5..581b9d04 100644
--- a/search/routes/api/__init__.py
+++ b/search/routes/api/__init__.py
@@ -39,7 +39,7 @@ def search() -> Response:
     # if requested == ATOM_XML:
     #     return serialize.as_atom(data), status, headers
     response_data = serialize.as_json(data['results'], query=data['query'])
-    return response_data, status_code, headers
+    return response_data, status_code, headers # type: ignore
 
 
 @blueprint.route('<arxiv:paper_id>v<string:version>', methods=['GET'])
@@ -47,4 +47,4 @@ def search() -> Response:
 def paper(paper_id: str, version: str) -> Response:
     """Document metadata endpoint."""
     data, status_code, headers = api.paper(f'{paper_id}v{version}')
-    return serialize.as_json(data['results']), status_code, headers
+    return serialize.as_json(data['results']), status_code, headers # type: ignore
diff --git a/search/routes/api/exceptions.py b/search/routes/api/exceptions.py
index bdf9db84..e3cf397a 100644
--- a/search/routes/api/exceptions.py
+++ b/search/routes/api/exceptions.py
@@ -45,7 +45,7 @@ def get_handlers() -> List[Tuple[type, Callable]]:
 def handle_not_found(error: NotFound) -> Response:
     """Render the base 404 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_404_NOT_FOUND
     return response
 
@@ -54,7 +54,7 @@ def handle_not_found(error: NotFound) -> Response:
 def handle_forbidden(error: Forbidden) -> Response:
     """Render the base 403 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_403_FORBIDDEN
     return response
 
@@ -63,7 +63,7 @@ def handle_forbidden(error: Forbidden) -> Response:
 def handle_unauthorized(error: Unauthorized) -> Response:
     """Render the base 401 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_401_UNAUTHORIZED
     return response
 
@@ -72,7 +72,7 @@ def handle_unauthorized(error: Unauthorized) -> Response:
 def handle_method_not_allowed(error: MethodNotAllowed) -> Response:
     """Render the base 405 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_405_METHOD_NOT_ALLOWED
     return response
 
@@ -81,7 +81,7 @@ def handle_method_not_allowed(error: MethodNotAllowed) -> Response:
 def handle_request_entity_too_large(error: RequestEntityTooLarge) -> Response:
     """Render the base 413 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_413_REQUEST_ENTITY_TOO_LARGE
     return response
 
@@ -90,7 +90,7 @@ def handle_request_entity_too_large(error: RequestEntityTooLarge) -> Response:
 def handle_bad_request(error: BadRequest) -> Response:
     """Render the base 400 error page."""
     rendered = jsonify({'code': error.code, 'error': error.description})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_400_BAD_REQUEST
     return response
 
@@ -104,6 +104,6 @@ def handle_internal_server_error(error: InternalServerError) -> Response:
         logger.error('Caught unhandled exception: %s', error)
         rendered = jsonify({'code': status.HTTP_500_INTERNAL_SERVER_ERROR,
                             'error': 'Unexpected error'})
-    response = make_response(rendered)
+    response: Response = make_response(rendered)
     response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
     return response
diff --git a/search/routes/ui.py b/search/routes/ui.py
index 0d89d8c2..9e38d712 100644
--- a/search/routes/ui.py
+++ b/search/routes/ui.py
@@ -40,7 +40,7 @@ def get_parameters_from_cookie() -> None:
         return
 
     # We need the request args to be mutable.
-    request.args = MultiDict(request.args.items(multi=True))
+    request.args = MultiDict(request.args.items(multi=True)) # type: ignore
     data = json.loads(request.cookies[PARAMS_COOKIE_NAME])
     for param in PARAMS_TO_PERSIST:
         # Don't clobber the user's explicit request.
@@ -75,11 +75,11 @@ def search(archives: Optional[List[str]] = None) -> Union[str, Response]:
     response, code, headers = simple.search(request.args, archives)
     logger.debug(f"controller returned code: {code}")
     if code == status.HTTP_200_OK:
-        return render_template("search/search.html", pagetitle="Search",
-                               archives=archives, **response)  # type: ignore
+        return render_template("search/search.html", pagetitle="Search", # type: ignore
+                               archives=archives, **response)
     elif (code == status.HTTP_301_MOVED_PERMANENTLY
           or code == status.HTTP_303_SEE_OTHER):
-        return redirect(headers['Location'], code=code)
+        return redirect(headers['Location'], code=code) # type: ignore
     raise InternalServerError('Unexpected error')
 
 
@@ -87,7 +87,7 @@ def search(archives: Optional[List[str]] = None) -> Union[str, Response]:
 def advanced_search() -> Union[str, Response]:
     """Advanced search interface."""
     response, code, headers = advanced.search(request.args)
-    return render_template(
+    return render_template( # type: ignore
         "search/advanced_search.html",
         pagetitle="Advanced Search",
         **response
@@ -103,7 +103,7 @@ def group_search(groups_or_archives: str) -> Union[str, Response]:
     interface. Anything else will result in a 404.
     """
     response, code, _ = advanced.group_search(request.args, groups_or_archives)
-    return render_template(
+    return render_template( # type: ignore
         "search/advanced_search.html",
         pagetitle="Advanced Search",
         **response
@@ -117,7 +117,7 @@ def service_status() -> Union[str, Response]:
 
     Exercises the search index connection with a real query.
     """
-    return health_check()
+    return health_check() # type: ignore
 
 
 def _browse_url(name: str, **parameters: Any) -> Optional[str]:
@@ -154,7 +154,7 @@ def url_for_page_builder() -> Dict[str, Callable]:
     def url_for_page(page: int, size: int) -> str:
         """Build an URL to for a search result page."""
         rule = request.url_rule
-        parts = url_parse(url_for(rule.endpoint))
+        parts = url_parse(url_for(rule.endpoint)) # type: ignore
         args = request.args.copy()
         args['start'] = (page - 1) * size
         parts = parts.replace(query=url_encode(args))
@@ -179,7 +179,7 @@ def current_url_sans_parameters_builder() -> Dict[str, Callable]:
     def current_url_sans_parameters(*params_to_remove: str) -> str:
         """Get the current URL with ``param`` removed from GET parameters."""
         rule = request.url_rule
-        parts = url_parse(url_for(rule.endpoint))
+        parts = url_parse(url_for(rule.endpoint)) # type: ignore
         args = request.args.copy()
         for param in params_to_remove:
             args.pop(param, None)