Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeout support for http/https requests #594

Merged
merged 4 commits into from
Mar 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ See [the migration docs](MIGRATING_FROM_OLDER_VERSIONS.rst) for details.

- Remove `tests` directory from package (PR [#589](https://github.com/RaRe-Technologies/smart_open/pull/589), [@e-nalepa](https://github.com/e-nalepa))
- Refactor S3, replace high-level resource/session API with low-level client API (PR [#583](https://github.com/RaRe-Technologies/smart_open/pull/583), [@mpenkov](https://github.com/mpenkov))
- Add timeout parameter for http/https (PR [#594](https://github.com/RaRe-Technologies/smart_open/pull/594), [@dustymugs](https://github.com/dustymugs))

# 4.2.0, 15 Feb 2021

Expand Down
28 changes: 22 additions & 6 deletions smart_open/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def open_uri(uri, mode, transport_params):
return open(uri, mode, **kwargs)


def open(uri, mode, kerberos=False, user=None, password=None, headers=None):
def open(uri, mode, kerberos=False, user=None, password=None, headers=None, timeout=None):
"""Implement streamed reader from a web site.

Supports Kerberos and Basic HTTP authentication.
Expand Down Expand Up @@ -80,7 +80,7 @@ def open(uri, mode, kerberos=False, user=None, password=None, headers=None):
if mode == constants.READ_BINARY:
fobj = SeekableBufferedInputBase(
uri, mode, kerberos=kerberos,
user=user, password=password, headers=headers
user=user, password=password, headers=headers, timeout=timeout,
)
fobj.name = os.path.basename(urllib.parse.urlparse(uri).path)
return fobj
Expand All @@ -90,7 +90,7 @@ def open(uri, mode, kerberos=False, user=None, password=None, headers=None):

class BufferedInputBase(io.BufferedIOBase):
def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE,
kerberos=False, user=None, password=None, headers=None):
kerberos=False, user=None, password=None, headers=None, timeout=None):
if kerberos:
import requests_kerberos
auth = requests_kerberos.HTTPKerberosAuth()
Expand All @@ -107,7 +107,15 @@ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE,
else:
self.headers = headers

self.response = requests.get(url, auth=auth, stream=True, headers=self.headers)
self.timeout = timeout

self.response = requests.get(
url,
auth=auth,
stream=True,
headers=self.headers,
timeout=self.timeout,
)

if not self.response.ok:
self.response.raise_for_status()
Expand Down Expand Up @@ -200,7 +208,7 @@ class SeekableBufferedInputBase(BufferedInputBase):
"""

def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE,
kerberos=False, user=None, password=None, headers=None):
kerberos=False, user=None, password=None, headers=None, timeout=None):
"""
If Kerberos is True, will attempt to use the local Kerberos credentials.
Otherwise, will try to use "basic" HTTP authentication via username/password.
Expand All @@ -222,6 +230,8 @@ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE,
else:
self.headers = headers

self.timeout = timeout

self.buffer_size = buffer_size
self.mode = mode
self.response = self._partial_request()
Expand Down Expand Up @@ -307,5 +317,11 @@ def _partial_request(self, start_pos=None):
if start_pos is not None:
self.headers.update({"range": smart_open.utils.make_range_string(start_pos)})

response = requests.get(self.url, auth=self.auth, stream=True, headers=self.headers)
response = requests.get(
self.url,
auth=self.auth,
stream=True,
headers=self.headers,
timeout=self.timeout,
)
return response
8 changes: 8 additions & 0 deletions smart_open/tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,11 @@ def test_https_seek_reverse(self):
fin.seek(-10, whence=smart_open.constants.WHENCE_CURRENT)
read_bytes_2 = fin.read(size=10)
self.assertEqual(read_bytes_1, read_bytes_2)

@responses.activate
def test_timeout_attribute(self):
timeout = 1
responses.add_callback(responses.GET, URL, callback=request_callback)
reader = smart_open.open(URL, "rb", transport_params={'timeout': timeout})
assert hasattr(reader, 'timeout')
assert reader.timeout == timeout