Skip to content

Commit

Permalink
RemoteSourceIndex: include method and requestBody params in CDX queries
Browse files Browse the repository at this point in the history
This enables CDX servers (such as OutbackCDX) to select the appropriate records for POST and PUT requests. All the server implementations I'm aware of ignore unknown query params so we should be safe to include the new params and let the server opt-in to handling them.
  • Loading branch information
ato committed Jun 12, 2023
1 parent 83b2113 commit 2bb97fc
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pywb/warcserver/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _load_index_source(self, params):

input_req = params.get('_input_req')
if input_req:
params['alt_url'] = input_req.include_method_query(url)
params.update(input_req.include_method_query(url))

cdx_iter = self.fuzzy(self.index_source, params)

Expand Down
6 changes: 6 additions & 0 deletions pywb/warcserver/index/indexsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ def _get_api_url(self, params):
if 'matchType' in params:
api_url += '&matchType=' + params.get('matchType')

if params.get('method'):
api_url += '&method=' + quote_plus(params['method'])

if params.get('requestBody'):
api_url += '&requestBody=' + quote_plus(params['requestBody'])

return api_url

def load_index(self, params):
Expand Down
8 changes: 8 additions & 0 deletions pywb/warcserver/index/test/test_indexsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,11 @@ def test_ait_filters(self):

assert(all([x.startswith(prefix) for x in filenames]))


# ============================================================================
def test_remote_api_url_encoding():
source = RemoteIndexSource.init_from_string('cdx+http://cdxserver.example/cdx')
assert (source._get_api_url({'url': 'http://iana.org/?query'}) ==
"http://cdxserver.example/cdx?url=http%3A//iana.org/%3Fquery&closest=&sort=closest")
assert (source._get_api_url({'url': 'http://iana.org/?query', 'method': 'POST', 'requestBody': 'body=1&two=2'}) ==
"http://cdxserver.example/cdx?url=http%3A//iana.org/%3Fquery&closest=&sort=closest&method=POST&requestBody=body%3D1%26two%3D2")
6 changes: 3 additions & 3 deletions pywb/warcserver/inputrequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ def _get_header(self, name):

def include_method_query(self, url):
if not url:
return url
return {'alt_url': url}

method = self.get_req_method()

if method == 'GET' or method == 'HEAD':
return url
return {'alt_url': url}

mime = self._get_content_type()
length = self._get_content_length()
Expand All @@ -96,7 +96,7 @@ def include_method_query(self, url):
if new_url != url:
self.env['wsgi.input'] = buffered_stream

return new_url
return {'alt_url': new_url, 'method': method, 'requestBody': query.query}

def get_full_request_uri(self):
req_uri = self.env.get('REQUEST_URI')
Expand Down

0 comments on commit 2bb97fc

Please sign in to comment.