Skip to content

Commit

Permalink
allow specifying a minimum/maximum for 'sleep-*' options (#1835)
Browse files Browse the repository at this point in the history
for example '"sleep-request": [5.0, 10.0]' to wait between 5 and 10
seconds between each HTTP request
  • Loading branch information
mikf committed Sep 14, 2021
1 parent bd84530 commit c9e6693
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 18 deletions.
30 changes: 25 additions & 5 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ Description
extractor.*.sleep
-----------------
Type
``float``
|Duration|_
Default
``0``
Description
Expand All @@ -324,7 +324,7 @@ Description
extractor.*.sleep-extractor
---------------------------
Type
``float``
|Duration|_
Default
``0``
Description
Expand All @@ -335,7 +335,7 @@ Description
extractor.*.sleep-request
-------------------------
Type
``float``
|Duration|_
Default
``0``
Description
Expand Down Expand Up @@ -3167,7 +3167,8 @@ Custom Types
Date
----
Type
``string`` or ``integer``
* ``string``
* ``integer``
Example
* ``"2019-01-01T00:00:00"``
* ``"2019"`` with ``"%Y"`` as `date-format`_
Expand All @@ -3179,10 +3180,28 @@ Description
* If given as ``integer``, it is interpreted as UTC timestamp.


Duration
--------
Type
* ``float``
* ``list`` with 2 ``floats``
Example
* ``2.85``
* ``[1.5, 3.0]``
Description
A |Duration|_ represents a span of time in seconds.

* If given as a single ``float``, it will be used as that exact value.
* If given as a ``list`` with 2 floating-point numbers ``a`` & ``b`` ,
it will be randomly chosen with uniform distribution such that ``a <= N <=b``.
(see `random.uniform() <https://docs.python.org/3/library/random.html#random.uniform>`_)


Path
----
Type
``string`` or ``list`` of ``strings``
* ``string``
* ``list`` of ``strings``
Example
* ``"file.ext"``
* ``"~/path/to/file.ext"``
Expand Down Expand Up @@ -3328,6 +3347,7 @@ Description
.. |datetime| replace:: ``datetime``
.. |datetime.max| replace:: ``datetime.max``
.. |Date| replace:: ``Date``
.. |Duration| replace:: ``Duration``
.. |Path| replace:: ``Path``
.. |Last-Modified| replace:: ``Last-Modified``
.. |Logging Configuration| replace:: ``Logging Configuration``
Expand Down
12 changes: 6 additions & 6 deletions gallery_dl/extractor/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ def __init__(self, match):
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
self.request_interval = self.config(
"sleep-request", self.request_interval)
self._interval = util.build_duration_func(
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)

if self._retries < 0:
self._retries = float("inf")
if self.request_interval < self.request_interval_min:
self.request_interval = self.request_interval_min

self._init_session()
self._init_cookies()
Expand Down Expand Up @@ -114,8 +114,8 @@ def request(self, url, *, method="GET", session=None, retries=None,
response = None
tries = 1

if self.request_interval:
seconds = (self.request_interval -
if self._interval:
seconds = (self._interval() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
self.log.debug("Sleeping for %.5s seconds", seconds)
Expand Down
15 changes: 8 additions & 7 deletions gallery_dl/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def run(self):
log = extractor.log
msg = None

sleep = extractor.config("sleep-extractor")
sleep = util.build_duration_func(extractor.config("sleep-extractor"))
if sleep:
time.sleep(sleep)
time.sleep(sleep())

try:
for msg in extractor:
Expand Down Expand Up @@ -236,7 +236,7 @@ def handle_url(self, url, kwdict):
return

if self.sleep:
time.sleep(self.sleep)
time.sleep(self.sleep())

# download from URL
if not self.download(url):
Expand Down Expand Up @@ -398,7 +398,7 @@ def initialize(self, kwdict=None):
if kwdict:
pathfmt.set_directory(kwdict)

self.sleep = cfg("sleep")
self.sleep = util.build_duration_func(cfg("sleep"))
self.fallback = cfg("fallback", True)
if not cfg("download", True):
# monkey-patch method to do nothing and always return True
Expand Down Expand Up @@ -541,7 +541,7 @@ def handle_url(self, url, kwdict):
self.pathfmt.set_filename(kwdict)
self.out.skip(self.pathfmt.path)
if self.sleep:
time.sleep(self.sleep)
time.sleep(self.sleep())
if self.archive:
self.archive.add(kwdict)

Expand Down Expand Up @@ -695,9 +695,10 @@ def __init__(self, url, parent=None, file=sys.stdout, ensure_ascii=True):
self.filter = util.identity if private else util.filter_dict

def run(self):
sleep = self.extractor.config("sleep-extractor")
sleep = util.build_duration_func(
self.extractor.config("sleep-extractor"))
if sleep:
time.sleep(sleep)
time.sleep(sleep())

# collect data
try:
Expand Down
18 changes: 18 additions & 0 deletions gallery_dl/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,24 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS):
return functools.partial(eval, code_object, globals)


def build_duration_func(duration, min=0.0):
if not duration:
return None

try:
lower, upper = duration
except TypeError:
pass
else:
return functools.partial(
random.uniform,
lower if lower > min else min,
upper if upper > min else min,
)

return functools.partial(identity, duration if duration > min else min)


def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True
Expand Down

0 comments on commit c9e6693

Please sign in to comment.