Skip to content

Commit

Permalink
loaders: webhdfs loader: support optional '&user.name=<name>' param f…
Browse files Browse the repository at this point in the history
…rom WEBHDFS_USER env var or '&delegation=<token>' from WEBHDFS_TOKEN env var (fixes ukwa/ukwa-pywb#5)
  • Loading branch information
ikreymer authored and N0taN3rd committed Sep 3, 2019
1 parent ec88e96 commit 959481f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 19 deletions.
25 changes: 16 additions & 9 deletions pywb/utils/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,23 +404,30 @@ def s3_load(anon=False):

# =================================================================
class WebHDFSLoader(HttpLoader):
HTTP_URL = 'http://{host}/webhdfs/v1{path}?op=OPEN&offset={offset}'
LENGTH_PARAM = '&length={length}'
HTTP_URL = 'http://{host}/webhdfs/v1{path}?'

def load(self, url, offset, length):
parts = urlsplit(url)

http_url = self.HTTP_URL
http_url = self.HTTP_URL.format(host=parts.netloc,
path=parts.path)

params = {'op': 'OPEN',
'offset': str(offset)
}

if length > 0:
http_url += self.LENGTH_PARAM
params['length'] = str(length)

if os.environ.get('WEBHDFS_USER'):
params['user.name'] = os.environ.get('WEBHDFS_USER')

if os.environ.get('WEBHDFS_TOKEN'):
params['delegation'] = os.environ.get('WEBHDFS_TOKEN')

full_url = http_url.format(host=parts.netloc,
path=parts.path,
offset=offset,
length=length)
http_url += urlencode(params)

return super(WebHDFSLoader, self).load(full_url, 0, -1)
return super(WebHDFSLoader, self).load(http_url, 0, -1)


# =================================================================
Expand Down
38 changes: 28 additions & 10 deletions pywb/utils/test/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
from pywb.utils.loaders import extract_client_cookie
from pywb.utils.loaders import read_last_line

from pywb.utils.canonicalize import canonicalize

from mock import patch

from warcio.bufferedreaders import DecompressingBufferedReader
Expand Down Expand Up @@ -119,20 +121,36 @@ def test_s3_read_2():
reader = DecompressingBufferedReader(BytesIO(buff))
assert reader.readline() == b'<!DOCTYPE html>\n'

def test_mock_webhdfs_load():
def mock_load(expected):
def mock(self, url, offset, length):
assert url == expected
assert offset == 0
assert length == -1
return None
def mock_load(expected):
def mock(self, url, offset, length):
assert canonicalize(url) == canonicalize(expected)
assert offset == 0
assert length == -1
return None

return mock
return mock

with patch('pywb.utils.loaders.HttpLoader.load', mock_load('http://remote-host:1234/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10&length=50')):
def test_mock_webhdfs_load_1():
expected = 'http://remote-host:1234/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10&length=50'
with patch('pywb.utils.loaders.HttpLoader.load', mock_load(expected)):
res = BlockLoader().load('webhdfs://remote-host:1234/some/file.warc.gz', 10, 50)

with patch('pywb.utils.loaders.HttpLoader.load', mock_load('http://remote-host/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10')):
def test_mock_webhdfs_load_2():
expected = 'http://remote-host/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10'
with patch('pywb.utils.loaders.HttpLoader.load', mock_load(expected)):
res = BlockLoader().load('webhdfs://remote-host/some/file.warc.gz', 10, -1)

def test_mock_webhdfs_load_3_username():
os.environ['WEBHDFS_USER'] = 'someuser'
expected = 'http://remote-host/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10&user.name=someuser'
with patch('pywb.utils.loaders.HttpLoader.load', mock_load(expected)):
res = BlockLoader().load('webhdfs://remote-host/some/file.warc.gz', 10, -1)

def test_mock_webhdfs_load_4_token():
os.environ['WEBHDFS_USER'] = ''
os.environ['WEBHDFS_TOKEN'] = 'ATOKEN'
expected = 'http://remote-host/webhdfs/v1/some/file.warc.gz?op=OPEN&offset=10&delegation=ATOKEN'
with patch('pywb.utils.loaders.HttpLoader.load', mock_load(expected)):
res = BlockLoader().load('webhdfs://remote-host/some/file.warc.gz', 10, -1)


Expand Down

0 comments on commit 959481f

Please sign in to comment.